001 /*
002 Licensed to the Apache Software Foundation (ASF) under one
003 or more contributor license agreements. See the NOTICE file
004 distributed with this work for additional information
005 regarding copyright ownership. The ASF licenses this file
006 to you under the Apache License, Version 2.0 (the
007 "License"); you may not use this file except in compliance
008 with the License. You may obtain a copy of the License at
009
010 http://www.apache.org/licenses/LICENSE-2.0
011
012 Unless required by applicable law or agreed to in writing,
013 software distributed under the License is distributed on an
014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 KIND, either express or implied. See the License for the
016 specific language governing permissions and limitations
017 under the License.
018 */
019 package org.apache.wiki.parser;
020
021 import java.io.IOException;
022 import java.io.Reader;
023 import java.io.StringReader;
024 import java.text.MessageFormat;
025 import java.util.ArrayList;
026 import java.util.Arrays;
027 import java.util.Collection;
028 import java.util.Collections;
029 import java.util.Comparator;
030 import java.util.EmptyStackException;
031 import java.util.HashMap;
032 import java.util.Iterator;
033 import java.util.List;
034 import java.util.Map;
035 import java.util.Properties;
036 import java.util.ResourceBundle;
037 import java.util.Stack;
038
039 import javax.xml.transform.Result;
040
041 import org.apache.commons.lang.StringEscapeUtils;
042 import org.apache.commons.lang.StringUtils;
043 import org.apache.log4j.Logger;
044 import org.apache.oro.text.GlobCompiler;
045 import org.apache.oro.text.regex.MalformedPatternException;
046 import org.apache.oro.text.regex.MatchResult;
047 import org.apache.oro.text.regex.Pattern;
048 import org.apache.oro.text.regex.PatternCompiler;
049 import org.apache.oro.text.regex.PatternMatcher;
050 import org.apache.oro.text.regex.Perl5Compiler;
051 import org.apache.oro.text.regex.Perl5Matcher;
052 import org.apache.wiki.InternalWikiException;
053 import org.apache.wiki.StringTransmutator;
054 import org.apache.wiki.VariableManager;
055 import org.apache.wiki.WikiContext;
056 import org.apache.wiki.WikiPage;
057 import org.apache.wiki.api.exceptions.PluginException;
058 import org.apache.wiki.api.exceptions.ProviderException;
059 import org.apache.wiki.api.plugin.WikiPlugin;
060 import org.apache.wiki.attachment.Attachment;
061 import org.apache.wiki.attachment.AttachmentManager;
062 import org.apache.wiki.auth.WikiSecurityException;
063 import org.apache.wiki.auth.acl.Acl;
064 import org.apache.wiki.i18n.InternationalizationManager;
065 import org.apache.wiki.preferences.Preferences;
066 import org.apache.wiki.render.CleanTextRenderer;
067 import org.apache.wiki.render.RenderingManager;
068 import org.apache.wiki.util.TextUtil;
069 import org.jdom2.Attribute;
070 import org.jdom2.Content;
071 import org.jdom2.Element;
072 import org.jdom2.IllegalDataException;
073 import org.jdom2.ProcessingInstruction;
074 import org.jdom2.Verifier;
075
076 /**
077 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the
078 * heart and soul of JSPWiki : make sure you test properly anything that is added,
079 * or else it breaks down horribly.
080 *
081 * @since 2.4
082 */
083 public class JSPWikiMarkupParser extends MarkupParser {
084
085 /** Name of the outlink image; relative path to the JSPWiki directory. */
086 private static final String OUTLINK_IMAGE = "images/out.png";
087
088 /** The value for anchor element <tt>class</tt> attributes when used
089 * for wiki page (normal) links. The value is "wikipage". */
090 public static final String CLASS_WIKIPAGE = "wikipage";
091
092 /** The value for anchor element <tt>class</tt> attributes when used
093 * for edit page links. The value is "createpage". */
094 public static final String CLASS_EDITPAGE = "createpage";
095
096 /** The value for anchor element <tt>class</tt> attributes when used
097 * for interwiki page links. The value is "interwiki". */
098 public static final String CLASS_INTERWIKI = "interwiki";
099
100 protected static final int READ = 0;
101 protected static final int EDIT = 1;
102 protected static final int EMPTY = 2; // Empty message
103 protected static final int LOCAL = 3;
104 protected static final int LOCALREF = 4;
105 protected static final int IMAGE = 5;
106 protected static final int EXTERNAL = 6;
107 protected static final int INTERWIKI = 7;
108 protected static final int IMAGELINK = 8;
109 protected static final int IMAGEWIKILINK = 9;
110 protected static final int ATTACHMENT = 10;
111
112 private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class );
113
114 private boolean m_isbold = false;
115 private boolean m_isitalic = false;
116 private boolean m_istable = false;
117 private boolean m_isPre = false;
118 private boolean m_isEscaping = false;
119 private boolean m_isdefinition = false;
120 private boolean m_isPreBlock = false;
121
122 /** Contains style information, in multiple forms. */
123 private Stack<Boolean> m_styleStack = new Stack<Boolean>();
124
125 // general list handling
126 private int m_genlistlevel = 0;
127 private StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern
128 private boolean m_allowPHPWikiStyleLists = true;
129
130
131 private boolean m_isOpenParagraph = false;
132
133 /** Keeps image regexp Patterns */
134 private List<Pattern> m_inlineImagePatterns;
135
136 /** Parser for extended link functionality. */
137 private LinkParser m_linkParser = new LinkParser();
138
139 private PatternMatcher m_inlineMatcher = new Perl5Matcher();
140
141 /** Keeps track of any plain text that gets put in the Text nodes */
142 private StringBuilder m_plainTextBuf = new StringBuilder(20);
143
144 private Element m_currentElement;
145
146 /** Keep track of duplicate header names. */
147 private Map<String, Integer> m_titleSectionCounter = new HashMap<String, Integer>();
148
149 /** If true, consider CamelCase hyperlinks as well. */
150 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks";
151
152 /** If true, all hyperlinks are translated as well, regardless whether they
153 are surrounded by brackets. */
154 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris";
155
156 /** If true, all outward links (external links) have a small link image appended. */
157 public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage";
158
159 /** If true, all outward attachment info links have a small link image appended. */
160 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
161
162 /** If set to "true", all external links are tagged with 'rel="nofollow"' */
163 public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow";
164
165 /** If true, then considers CamelCase links as well. */
166 private boolean m_camelCaseLinks = false;
167
168 /** If true, then generate special output for wysiwyg editing in certain cases */
169 private boolean m_wysiwygEditorMode = false;
170
171 /** If true, consider URIs that have no brackets as well. */
172 // FIXME: Currently reserved, but not used.
173 private boolean m_plainUris = false;
174
175 /** If true, all outward links use a small link image. */
176 private boolean m_useOutlinkImage = true;
177
178 private boolean m_useAttachmentImage = true;
179
180 /** If true, allows raw HTML. */
181 private boolean m_allowHTML = false;
182
183 private boolean m_useRelNofollow = false;
184
185 private PatternCompiler m_compiler = new Perl5Compiler();
186
187 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
188
189 private PatternMatcher m_camelCaseMatcher = new Perl5Matcher();
190 private Pattern m_camelCasePattern;
191
192 private int m_rowNum = 1;
193
194 private Heading m_lastHeading = null;
195
196 /**
197 * This list contains all IANA registered URI protocol
198 * types as of September 2004 + a few well-known extra types.
199 *
200 * JSPWiki recognises all of them as external links.
201 *
202 * This array is sorted during class load, so you can just dump
203 * here whatever you want in whatever order you want.
204 */
205 static final String[] EXTERNAL_LINKS = {
206 "http:", "ftp:", "https:", "mailto:",
207 "news:", "file:", "rtsp:", "mms:", "ldap:",
208 "gopher:", "nntp:", "telnet:", "wais:",
209 "prospero:", "z39.50s", "z39.50r", "vemmi:",
210 "imap:", "nfs:", "acap:", "tip:", "pop:",
211 "dav:", "opaquelocktoken:", "sip:", "sips:",
212 "tel:", "fax:", "modem:", "soap.beep:", "soap.beeps",
213 "xmlrpc.beep", "xmlrpc.beeps", "urn:", "go:",
214 "h323:", "ipp:", "tftp:", "mupdate:", "pres:",
215 "im:", "mtqp", "smb:" };
216
217 private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns";
218
219 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern";
220
221 private static final String[] CLASS_TYPES =
222 {
223 CLASS_WIKIPAGE,
224 CLASS_EDITPAGE,
225 "",
226 "footnote",
227 "footnoteref",
228 "",
229 "external",
230 CLASS_INTERWIKI,
231 "external",
232 CLASS_WIKIPAGE,
233 "attachment"
234 };
235
236
237 /**
238 * This Comparator is used to find an external link from c_externalLinks. It
239 * checks if the link starts with the other arraythingie.
240 */
241 private static Comparator<String> c_startingComparator = new StartingComparator();
242
243 static
244 {
245 Arrays.sort( EXTERNAL_LINKS );
246 }
247
248 /**
249 * Creates a markup parser.
250 *
251 * @param context The WikiContext which controls the parsing
252 * @param in Where the data is read from.
253 */
254 public JSPWikiMarkupParser( WikiContext context, Reader in )
255 {
256 super( context, in );
257 initialize();
258 }
259
260 // FIXME: parsers should be pooled for better performance.
261 @SuppressWarnings("unchecked")
262 private void initialize()
263 {
264 PatternCompiler compiler = new GlobCompiler();
265 List<Pattern> compiledpatterns;
266
267 //
268 // We cache compiled patterns in the engine, since their creation is
269 // really expensive
270 //
271 compiledpatterns = (List<Pattern>)m_engine.getAttribute( INLINE_IMAGE_PATTERNS );
272
273 if( compiledpatterns == null )
274 {
275 compiledpatterns = new ArrayList<Pattern>(20);
276 Collection< String > ptrns = m_engine.getAllInlinedImagePatterns();
277
278 //
279 // Make them into Regexp Patterns. Unknown patterns
280 // are ignored.
281 //
282 for( Iterator< String > i = ptrns.iterator(); i.hasNext(); )
283 {
284 try
285 {
286 compiledpatterns.add( compiler.compile( i.next(),
287 GlobCompiler.DEFAULT_MASK|GlobCompiler.READ_ONLY_MASK ) );
288 }
289 catch( MalformedPatternException e )
290 {
291 log.error("Malformed pattern in properties: ", e );
292 }
293 }
294
295 m_engine.setAttribute( INLINE_IMAGE_PATTERNS, compiledpatterns );
296 }
297
298 m_inlineImagePatterns = Collections.unmodifiableList(compiledpatterns);
299
300 m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN );
301 if( m_camelCasePattern == null )
302 {
303 try
304 {
305 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,
306 Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
307 }
308 catch( MalformedPatternException e )
309 {
310 log.fatal("Internal error: Someone put in a faulty pattern.",e);
311 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader");
312 }
313 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
314 }
315 //
316 // Set the properties.
317 //
318 Properties props = m_engine.getWikiProperties();
319
320 String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
321
322 if( cclinks != null )
323 {
324 m_camelCaseLinks = TextUtil.isPositive( cclinks );
325 }
326 else
327 {
328 m_camelCaseLinks = TextUtil.getBooleanProperty( props,
329 PROP_CAMELCASELINKS,
330 m_camelCaseLinks );
331 }
332
333
334
335 Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE );
336 if( wysiwygVariable != null )
337 {
338 m_wysiwygEditorMode = wysiwygVariable.booleanValue();
339 }
340
341 m_plainUris = getLocalBooleanProperty( m_context,
342 props,
343 PROP_PLAINURIS,
344 m_plainUris );
345 m_useOutlinkImage = getLocalBooleanProperty( m_context,
346 props,
347 PROP_USEOUTLINKIMAGE,
348 m_useOutlinkImage );
349 m_useAttachmentImage = getLocalBooleanProperty( m_context,
350 props,
351 PROP_USEATTACHMENTIMAGE,
352 m_useAttachmentImage );
353 m_allowHTML = getLocalBooleanProperty( m_context,
354 props,
355 MarkupParser.PROP_ALLOWHTML,
356 m_allowHTML );
357
358 m_useRelNofollow = getLocalBooleanProperty( m_context,
359 props,
360 PROP_USERELNOFOLLOW,
361 m_useRelNofollow );
362
363 if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null )
364 {
365 disableAccessRules();
366 }
367
368 m_context.getPage().setHasMetadata();
369 }
370
371 /**
372 * This is just a simple helper method which will first check the context
373 * if there is already an override in place, and if there is not,
374 * it will then check the given properties.
375 *
376 * @param context WikiContext to check first
377 * @param props Properties to check next
378 * @param key What key are we searching for?
379 * @param defValue Default value for the boolean
380 * @return True or false
381 */
382 private static boolean getLocalBooleanProperty( WikiContext context,
383 Properties props,
384 String key,
385 boolean defValue )
386 {
387 Object bool = context.getVariable(key);
388
389 if( bool != null )
390 {
391 return TextUtil.isPositive( (String) bool );
392 }
393
394 return TextUtil.getBooleanProperty( props, key, defValue );
395 }
396
397 /**
398 * Returns link name, if it exists; otherwise it returns null.
399 */
400 private String linkExists( String page )
401 {
402 try
403 {
404 if( page == null || page.length() == 0 ) return null;
405
406 return m_engine.getFinalPageName( page );
407 }
408 catch( ProviderException e )
409 {
410 log.warn("TranslatorReader got a faulty page name!",e);
411
412 return page; // FIXME: What would be the correct way to go back?
413 }
414 }
415
416 /**
417 * Calls a transmutator chain.
418 *
419 * @param list Chain to call
420 * @param text Text that should be passed to the mutate() method
421 * of each of the mutators in the chain.
422 * @return The result of the mutation.
423 */
424
425 protected String callMutatorChain( Collection list, String text )
426 {
427 if( list == null || list.size() == 0 )
428 {
429 return text;
430 }
431
432 for( Iterator i = list.iterator(); i.hasNext(); )
433 {
434 StringTransmutator m = (StringTransmutator) i.next();
435
436 text = m.mutate( m_context, text );
437 }
438
439 return text;
440 }
441
442 /**
443 * Calls the heading listeners.
444 *
445 * @param param A Heading object.
446 */
447 protected void callHeadingListenerChain( Heading param )
448 {
449 List list = m_headingListenerChain;
450
451 for( Iterator i = list.iterator(); i.hasNext(); )
452 {
453 HeadingListener h = (HeadingListener) i.next();
454
455 h.headingAdded( m_context, param );
456 }
457 }
458
459 /**
460 * Creates a JDOM anchor element. Can be overridden to change the URL creation,
461 * if you really know what you are doing.
462 *
463 * @param type One of the types above
464 * @param link URL to which to link to
465 * @param text Link text
466 * @param section If a particular section identifier is required.
467 * @return An A element.
468 * @since 2.4.78
469 */
470 protected Element createAnchor(int type, String link, String text, String section)
471 {
472 text = escapeHTMLEntities( text );
473 section = escapeHTMLEntities( section );
474 Element el = new Element("a");
475 el.setAttribute("class",CLASS_TYPES[type]);
476 el.setAttribute("href",link+section);
477 el.addContent(text);
478 return el;
479 }
480
481 private Element makeLink( int type, String link, String text, String section, Iterator attributes )
482 {
483 Element el = null;
484
485 if( text == null ) text = link;
486
487 text = callMutatorChain( m_linkMutators, text );
488
489 section = (section != null) ? ("#"+section) : "";
490
491 // Make sure we make a link name that can be accepted
492 // as a valid URL.
493
494 if( link.length() == 0 )
495 {
496 type = EMPTY;
497 }
498 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
499
500 switch(type)
501 {
502 case READ:
503 el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section );
504 break;
505
506 case EDIT:
507 el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" );
508 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
509 break;
510
511 case EMPTY:
512 el = new Element("u").addContent(text);
513 break;
514
515 //
516 // These two are for local references - footnotes and
517 // references to footnotes.
518 // We embed the page name (or whatever WikiContext gives us)
519 // to make sure the links are unique across Wiki.
520 //
521 case LOCALREF:
522 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
523 break;
524
525 case LOCAL:
526 el = new Element("a").setAttribute("class","footnote");
527 el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1));
528 el.addContent("["+text+"]");
529 break;
530
531 //
532 // With the image, external and interwiki types we need to
533 // make sure nobody can put in Javascript or something else
534 // annoying into the links themselves. We do this by preventing
535 // a haxor from stopping the link name short with quotes in
536 // fillBuffer().
537 //
538 case IMAGE:
539 el = new Element("img").setAttribute("class","inline");
540 el.setAttribute("src",link);
541 el.setAttribute("alt",text);
542 break;
543
544 case IMAGELINK:
545 el = new Element("img").setAttribute("class","inline");
546 el.setAttribute("src",link);
547 el.setAttribute("alt",text);
548 el = createAnchor(IMAGELINK,text,"","").addContent(el);
549 break;
550
551 case IMAGEWIKILINK:
552 String pagelink = m_context.getURL(WikiContext.VIEW,text);
553 el = new Element("img").setAttribute("class","inline");
554 el.setAttribute("src",link);
555 el.setAttribute("alt",text);
556 el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el);
557 break;
558
559 case EXTERNAL:
560 el = createAnchor( EXTERNAL, link, text, section );
561 if( m_useRelNofollow ) el.setAttribute("rel","nofollow");
562 break;
563
564 case INTERWIKI:
565 el = createAnchor( INTERWIKI, link, text, section );
566 break;
567
568 case ATTACHMENT:
569 String attlink = m_context.getURL( WikiContext.ATTACH,
570 link );
571
572 String infolink = m_context.getURL( WikiContext.INFO,
573 link );
574
575 String imglink = m_context.getURL( WikiContext.NONE,
576 "images/attachment_small.png" );
577
578 el = createAnchor( ATTACHMENT, attlink, text, "" );
579
580 pushElement(el);
581 popElement(el.getName());
582
583 if( m_useAttachmentImage )
584 {
585 el = new Element("img").setAttribute("src",imglink);
586 el.setAttribute("border","0");
587 el.setAttribute("alt","(info)");
588
589 el = new Element("a").setAttribute("href",infolink).addContent(el);
590 el.setAttribute("class","infolink");
591 }
592 else
593 {
594 el = null;
595 }
596 break;
597
598 default:
599 break;
600 }
601
602 if( el != null && attributes != null )
603 {
604 while( attributes.hasNext() )
605 {
606 Attribute attr = (Attribute)attributes.next();
607 if( attr != null )
608 {
609 el.setAttribute(attr);
610 }
611 }
612 }
613
614 if( el != null )
615 {
616 flushPlainText();
617 m_currentElement.addContent( el );
618 }
619 return el;
620 }
621
622
623 /**
624 * Figures out if a link is an off-site link. This recognizes
625 * the most common protocols by checking how it starts.
626 *
627 * @param link The link to check.
628 * @return true, if this is a link outside of this wiki.
629 * @since 2.4
630 */
631
632 public static boolean isExternalLink( String link )
633 {
634 int idx = Arrays.binarySearch( EXTERNAL_LINKS, link,
635 c_startingComparator );
636
637 //
638 // We need to check here once again; otherwise we might
639 // get a match for something like "h".
640 //
641 if( idx >= 0 && link.startsWith(EXTERNAL_LINKS[idx]) ) return true;
642
643 return false;
644 }
645
646 /**
647 * Returns true, if the link in question is an access
648 * rule.
649 */
650 private static boolean isAccessRule( String link )
651 {
652 return link.startsWith("{ALLOW") || link.startsWith("{DENY");
653 }
654
655 /**
656 * Returns true if the link is really command to insert
657 * a plugin.
658 * <P>
659 * Currently we just check if the link starts with "{INSERT",
660 * or just plain "{" but not "{$".
661 *
662 * @param link Link text, i.e. the contents of text between [].
663 * @return True, if this link seems to be a command to insert a plugin here.
664 */
665 public static boolean isPluginLink( String link )
666 {
667 return link.startsWith( "{INSERT" ) ||
668 ( link.startsWith( "{" ) && !link.startsWith( "{$" ) );
669 }
670
671 /**
672 * Matches the given link to the list of image name patterns
673 * to determine whether it should be treated as an inline image
674 * or not.
675 */
676 private boolean isImageLink( String link )
677 {
678 if( m_inlineImages )
679 {
680 link = link.toLowerCase();
681
682 for( Iterator i = m_inlineImagePatterns.iterator(); i.hasNext(); )
683 {
684 if( m_inlineMatcher.matches( link, (Pattern) i.next() ) )
685 return true;
686 }
687 }
688
689 return false;
690 }
691
692 private static boolean isMetadata( String link )
693 {
694 return link.startsWith("{SET");
695 }
696
697 /**
698 * These are all of the HTML 4.01 block-level elements.
699 */
700 private static final String[] BLOCK_ELEMENTS = {
701 "address", "blockquote", "div", "dl", "fieldset", "form",
702 "h1", "h2", "h3", "h4", "h5", "h6",
703 "hr", "noscript", "ol", "p", "pre", "table", "ul"
704 };
705
706 private static boolean isBlockLevel( String name )
707 {
708 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
709 }
710
711 /**
712 * This method peeks ahead in the stream until EOL and returns the result.
713 * It will keep the buffers untouched.
714 *
715 * @return The string from the current position to the end of line.
716 */
717
718 // FIXME: Always returns an empty line, even if the stream is full.
719 private String peekAheadLine()
720 throws IOException
721 {
722 String s = readUntilEOL().toString();
723
724 if( s.length() > PUSHBACK_BUFFER_SIZE )
725 {
726 log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover...");
727 pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) );
728 }
729 else
730 {
731 try
732 {
733 pushBack( s );
734 }
735 catch( IOException e )
736 {
737 log.warn("Pushback failed: the line is probably too long. Attempting to recover.");
738 }
739 }
740 return s;
741 }
742
743
744 /**
745 * Writes HTML for error message. Does not add it to the document, you
746 * have to do it yourself.
747 *
748 * @param error The error string.
749 * @return An Element containing the error.
750 */
751
752 public static Element makeError( String error )
753 {
754 return new Element("span").setAttribute("class","error").addContent(error);
755 }
756
757 private int flushPlainText()
758 {
759 int numChars = m_plainTextBuf.length();
760
761 if( numChars > 0 )
762 {
763 String buf;
764
765 if( !m_allowHTML )
766 {
767 buf = escapeHTMLEntities(m_plainTextBuf.toString());
768 }
769 else
770 {
771 buf = m_plainTextBuf.toString();
772 }
773 //
774 // We must first empty the buffer because the side effect of
775 // calling makeCamelCaseLink() is to call this routine.
776 //
777
778 m_plainTextBuf = new StringBuilder(20);
779
780 try
781 {
782 //
783 // This is the heaviest part of parsing, and therefore we can
784 // do some optimization here.
785 //
786 // 1) Only when the length of the buffer is big enough, we try to do the match
787 //
788
789 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 )
790 {
791 // System.out.println("Buffer="+buf);
792
793 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) )
794 {
795 MatchResult result = m_camelCaseMatcher.getMatch();
796
797 String firstPart = buf.substring(0,result.beginOffset(0));
798 String prefix = result.group(1);
799
800 if( prefix == null ) prefix = "";
801
802 String camelCase = result.group(2);
803 String protocol = result.group(3);
804 String uri = protocol+result.group(4);
805 buf = buf.substring(result.endOffset(0));
806
807 m_currentElement.addContent( firstPart );
808
809 //
810 // Check if the user does not wish to do URL or WikiWord expansion
811 //
812 if( prefix.endsWith("~") || prefix.indexOf('[') != -1 )
813 {
814 if( prefix.endsWith("~") )
815 {
816 if( m_wysiwygEditorMode )
817 {
818 m_currentElement.addContent( "~" );
819 }
820 prefix = prefix.substring(0,prefix.length()-1);
821 }
822 if( camelCase != null )
823 {
824 m_currentElement.addContent( prefix+camelCase );
825 }
826 else if( protocol != null )
827 {
828 m_currentElement.addContent( prefix+uri );
829 }
830 continue;
831 }
832
833 //
834 // Fine, then let's check what kind of a link this was
835 // and emit the proper elements
836 //
837 if( protocol != null )
838 {
839 char c = uri.charAt(uri.length()-1);
840 if( c == '.' || c == ',' )
841 {
842 uri = uri.substring(0,uri.length()-1);
843 buf = c + buf;
844 }
845 // System.out.println("URI match "+uri);
846 m_currentElement.addContent( prefix );
847 makeDirectURILink( uri );
848 }
849 else
850 {
851 // System.out.println("Matched: '"+camelCase+"'");
852 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
853 // System.out.println("prefix="+prefix);
854 m_currentElement.addContent( prefix );
855
856 makeCamelCaseLink( camelCase );
857 }
858 }
859
860 m_currentElement.addContent( buf );
861 }
862 else
863 {
864 //
865 // No camelcase asked for, just add the elements
866 //
867 m_currentElement.addContent( buf );
868 }
869 }
870 catch( IllegalDataException e )
871 {
872 //
873 // Sometimes it's possible that illegal XML chars is added to the data.
874 // Here we make sure it does not stop parsing.
875 //
876 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
877 }
878 }
879
880 return numChars;
881 }
882
883 /**
884 * Escapes XML entities in a HTML-compatible way (i.e. does not escape
885 * entities that are already escaped).
886 *
887 * @param buf
888 * @return An escaped string.
889 */
890 private String escapeHTMLEntities(String buf)
891 {
892 StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
893
894 for( int i = 0; i < buf.length(); i++ )
895 {
896 char ch = buf.charAt(i);
897
898 if( ch == '<' )
899 {
900 tmpBuf.append("<");
901 }
902 else if( ch == '>' )
903 {
904 tmpBuf.append(">");
905 }
906 else if( ch == '\"' )
907 {
908 tmpBuf.append(""");
909 }
910 else if( ch == '&' )
911 {
912 //
913 // If the following is an XML entity reference (&#.*;) we'll
914 // leave it as it is; otherwise we'll replace it with an &
915 //
916
917 boolean isEntity = false;
918 StringBuilder entityBuf = new StringBuilder();
919
920 if( i < buf.length() -1 )
921 {
922 for( int j = i; j < buf.length(); j++ )
923 {
924 char ch2 = buf.charAt(j);
925
926 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' )
927 {
928 entityBuf.append(ch2);
929
930 if( ch2 == ';' )
931 {
932 isEntity = true;
933 break;
934 }
935 }
936 else
937 {
938 break;
939 }
940 }
941 }
942
943 if( isEntity )
944 {
945 tmpBuf.append( entityBuf );
946 i = i + entityBuf.length() - 1;
947 }
948 else
949 {
950 tmpBuf.append("&");
951 }
952
953 }
954 else
955 {
956 tmpBuf.append( ch );
957 }
958 }
959
960 return tmpBuf.toString();
961 }
962
963 private Element pushElement( Element e )
964 {
965 flushPlainText();
966 m_currentElement.addContent( e );
967 m_currentElement = e;
968
969 return e;
970 }
971
972 private Element addElement( Content e )
973 {
974 if( e != null )
975 {
976 flushPlainText();
977 m_currentElement.addContent( e );
978 }
979 return m_currentElement;
980 }
981
982 /**
983 * All elements that can be empty by the HTML DTD.
984 */
985 // Keep sorted.
986 private static final String[] EMPTY_ELEMENTS = {
987 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
988 };
989
990 /**
991 * Goes through the current element stack and pops all elements until this
992 * element is found - this essentially "closes" and element.
993 *
994 * @param s
995 * @return The new current element, or null, if there was no such element in the entire stack.
996 */
997 private Element popElement( String s )
998 {
999 int flushedBytes = flushPlainText();
1000
1001 Element currEl = m_currentElement;
1002
1003 while( currEl.getParentElement() != null )
1004 {
1005 if( currEl.getName().equals(s) && !currEl.isRootElement() )
1006 {
1007 m_currentElement = currEl.getParentElement();
1008
1009 //
1010 // Check if it's okay for this element to be empty. Then we will
1011 // trick the JDOM generator into not generating an empty element,
1012 // by putting an empty string between the tags. Yes, it's a kludge
1013 // but what'cha gonna do about it. :-)
1014 //
1015
1016 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 )
1017 {
1018 currEl.addContent("");
1019 }
1020
1021 return m_currentElement;
1022 }
1023
1024 currEl = currEl.getParentElement();
1025 }
1026
1027 return null;
1028 }
1029
1030
1031 /**
1032 * Reads the stream until it meets one of the specified
1033 * ending characters, or stream end. The ending character will be left
1034 * in the stream.
1035 */
1036 private String readUntil( String endChars )
1037 throws IOException
1038 {
1039 StringBuilder sb = new StringBuilder( 80 );
1040 int ch = nextToken();
1041
1042 while( ch != -1 )
1043 {
1044 if( ch == '\\' )
1045 {
1046 ch = nextToken();
1047 if( ch == -1 )
1048 {
1049 break;
1050 }
1051 }
1052 else
1053 {
1054 if( endChars.indexOf((char)ch) != -1 )
1055 {
1056 pushBack( ch );
1057 break;
1058 }
1059 }
1060 sb.append( (char) ch );
1061 ch = nextToken();
1062 }
1063
1064 return sb.toString();
1065 }
1066
1067 /**
1068 * Reads the stream while the characters that have been specified are
1069 * in the stream, returning then the result as a String.
1070 */
1071 private String readWhile( String endChars )
1072 throws IOException
1073 {
1074 StringBuilder sb = new StringBuilder( 80 );
1075 int ch = nextToken();
1076
1077 while( ch != -1 )
1078 {
1079 if( endChars.indexOf((char)ch) == -1 )
1080 {
1081 pushBack( ch );
1082 break;
1083 }
1084
1085 sb.append( (char) ch );
1086 ch = nextToken();
1087 }
1088
1089 return sb.toString();
1090 }
1091
1092 private JSPWikiMarkupParser m_cleanTranslator;
1093
1094 /**
1095 * Does a lazy init. Otherwise, we would get into a situation
1096 * where HTMLRenderer would try and boot a TranslatorReader before
1097 * the TranslatorReader it is contained by is up.
1098 */
1099 private JSPWikiMarkupParser getCleanTranslator()
1100 {
1101 if( m_cleanTranslator == null )
1102 {
1103 WikiContext dummyContext = new WikiContext( m_engine,
1104 m_context.getHttpRequest(),
1105 m_context.getPage() );
1106 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
1107
1108 m_cleanTranslator.m_allowHTML = true;
1109 }
1110
1111 return m_cleanTranslator;
1112 }
1113 /**
1114 * Modifies the "hd" parameter to contain proper values. Because
1115 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
1116 * % after url encoding with '_'.
1117 * <p>
1118 * Counts also duplicate headings (= headings with similar name), and
1119 * attaches a counter.
1120 */
1121 private String makeHeadingAnchor( String baseName, String title, Heading hd )
1122 {
1123 hd.m_titleText = title;
1124 title = MarkupParser.wikifyLink( title );
1125
1126 hd.m_titleSection = m_engine.encodeName(title);
1127
1128 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) )
1129 {
1130 Integer count = m_titleSectionCounter.get( hd.m_titleSection );
1131 count = count + 1;
1132 m_titleSectionCounter.put( hd.m_titleSection, count );
1133 hd.m_titleSection += "-" + count;
1134 }
1135 else
1136 {
1137 m_titleSectionCounter.put( hd.m_titleSection, 1 );
1138 }
1139
1140 hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+
1141 "-"+hd.m_titleSection;
1142 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
1143 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
1144
1145 return hd.m_titleAnchor;
1146 }
1147
1148 private String makeSectionTitle( String title )
1149 {
1150 title = title.trim();
1151 String outTitle;
1152
1153 try
1154 {
1155 JSPWikiMarkupParser dtr = getCleanTranslator();
1156 dtr.setInputReader( new StringReader(title) );
1157
1158 CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse());
1159
1160 outTitle = ctt.getString();
1161 }
1162 catch( IOException e )
1163 {
1164 log.fatal("CleanTranslator not working", e);
1165 throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() );
1166 }
1167
1168 return outTitle;
1169 }
1170
1171 /**
1172 * Returns XHTML for the heading.
1173 *
1174 * @param level The level of the heading. @see Heading
1175 * @param title the title for the heading
1176 * @param hd a List to which heading should be added
1177 * @return An Element containing the heading
1178 */
1179 public Element makeHeading( int level, String title, Heading hd )
1180 {
1181 Element el = null;
1182
1183 String pageName = m_context.getPage().getName();
1184
1185 String outTitle = makeSectionTitle( title );
1186
1187 hd.m_level = level;
1188
1189 switch( level )
1190 {
1191 case Heading.HEADING_SMALL:
1192 el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
1193 break;
1194
1195 case Heading.HEADING_MEDIUM:
1196 el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
1197 break;
1198
1199 case Heading.HEADING_LARGE:
1200 el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
1201 break;
1202
1203 default:
1204 throw new InternalWikiException("Illegal heading type "+level);
1205 }
1206
1207
1208 return el;
1209 }
1210
1211 /**
1212 * When given a link to a WikiName, we just return
1213 * a proper HTML link for it. The local link mutator
1214 * chain is also called.
1215 */
1216 private Element makeCamelCaseLink( String wikiname )
1217 {
1218 String matchedLink;
1219
1220 callMutatorChain( m_localLinkMutatorChain, wikiname );
1221
1222 if( (matchedLink = linkExists( wikiname )) != null )
1223 {
1224 makeLink( READ, matchedLink, wikiname, null, null );
1225 }
1226 else
1227 {
1228 makeLink( EDIT, wikiname, wikiname, null, null );
1229 }
1230
1231 return m_currentElement;
1232 }
1233
1234 /** Holds the image URL for the duration of this parser */
1235 private String m_outlinkImageURL = null;
1236
1237 /**
1238 * Returns an element for the external link image (out.png). However,
1239 * this method caches the URL for the lifetime of this MarkupParser,
1240 * because it's commonly used, and we'll end up with possibly hundreds
1241 * our thousands of references to it... It's a lot faster, too.
1242 *
1243 * @return An element containing the HTML for the outlink image.
1244 */
1245 private Element outlinkImage()
1246 {
1247 Element el = null;
1248
1249 if( m_useOutlinkImage )
1250 {
1251 if( m_outlinkImageURL == null )
1252 {
1253 m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE );
1254 }
1255
1256 el = new Element("img").setAttribute("class", "outlink");
1257 el.setAttribute( "src", m_outlinkImageURL );
1258 el.setAttribute("alt","");
1259 }
1260
1261 return el;
1262 }
1263
1264 /**
1265 * Takes an URL and turns it into a regular wiki link. Unfortunately,
1266 * because of the way that flushPlainText() works, it already encodes
1267 * all of the XML entities. But so does WikiContext.getURL(), so we
1268 * have to do a reverse-replace here, so that it can again be replaced in makeLink.
1269 * <p>
1270 * What a crappy problem.
1271 *
1272 * @param url
1273 * @return An anchor Element containing the link.
1274 */
1275 private Element makeDirectURILink( String url )
1276 {
1277 Element result;
1278 String last = null;
1279
1280 if( url.endsWith(",") || url.endsWith(".") )
1281 {
1282 last = url.substring( url.length()-1 );
1283 url = url.substring( 0, url.length()-1 );
1284 }
1285
1286 callMutatorChain( m_externalLinkMutatorChain, url );
1287
1288 if( isImageLink( url ) )
1289 {
1290 result = handleImageLink( StringUtils.replace(url,"&","&"), url, false );
1291 }
1292 else
1293 {
1294 result = makeLink( EXTERNAL, StringUtils.replace(url,"&","&"), url, null, null );
1295 addElement( outlinkImage() );
1296 }
1297
1298 if( last != null )
1299 {
1300 m_plainTextBuf.append(last);
1301 }
1302
1303 return result;
1304 }
1305
1306 /**
1307 * Image links are handled differently:
1308 * 1. If the text is a WikiName of an existing page,
1309 * it gets linked.
1310 * 2. If the text is an external link, then it is inlined.
1311 * 3. Otherwise it becomes an ALT text.
1312 *
1313 * @param reallink The link to the image.
1314 * @param link Link text portion, may be a link to somewhere else.
1315 * @param hasLinkText If true, then the defined link had a link text available.
1316 * This means that the link text may be a link to a wiki page,
1317 * or an external resource.
1318 */
1319
1320 // FIXME: isExternalLink() is called twice.
1321 private Element handleImageLink( String reallink, String link, boolean hasLinkText )
1322 {
1323 String possiblePage = MarkupParser.cleanLink( link );
1324
1325 if( isExternalLink( link ) && hasLinkText )
1326 {
1327 return makeLink( IMAGELINK, reallink, link, null, null );
1328 }
1329 else if( ( linkExists( possiblePage ) ) != null &&
1330 hasLinkText )
1331 {
1332 // System.out.println("Orig="+link+", Matched: "+matchedLink);
1333 callMutatorChain( m_localLinkMutatorChain, possiblePage );
1334
1335 return makeLink( IMAGEWIKILINK, reallink, link, null, null );
1336 }
1337 else
1338 {
1339 return makeLink( IMAGE, reallink, link, null, null );
1340 }
1341 }
1342
1343 private Element handleAccessRule( String ruleLine )
1344 {
1345 if( m_wysiwygEditorMode )
1346 {
1347 m_currentElement.addContent( "[" + ruleLine + "]" );
1348 }
1349
1350 if( !m_parseAccessRules ) return m_currentElement;
1351 Acl acl;
1352 WikiPage page = m_context.getRealPage();
1353 // UserDatabase db = m_context.getEngine().getUserDatabase();
1354
1355 if( ruleLine.startsWith( "{" ) )
1356 ruleLine = ruleLine.substring( 1 );
1357 if( ruleLine.endsWith( "}" ) )
1358 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1359
1360 if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine);
1361
1362 try
1363 {
1364 acl = m_engine.getAclManager().parseAcl( page, ruleLine );
1365
1366 page.setAcl( acl );
1367
1368 if( log.isDebugEnabled() ) log.debug( acl.toString() );
1369 }
1370 catch( WikiSecurityException wse )
1371 {
1372 return makeError( wse.getMessage() );
1373 }
1374
1375 return m_currentElement;
1376 }
1377
1378 /**
1379 * Handles metadata setting [{SET foo=bar}]
1380 */
1381 private Element handleMetadata( String link )
1382 {
1383 if( m_wysiwygEditorMode )
1384 {
1385 m_currentElement.addContent( "[" + link + "]" );
1386 }
1387
1388 try
1389 {
1390 String args = link.substring( link.indexOf(' '), link.length()-1 );
1391
1392 String name = args.substring( 0, args.indexOf('=') );
1393 String val = args.substring( args.indexOf('=')+1, args.length() );
1394
1395 name = name.trim();
1396 val = val.trim();
1397
1398 if( val.startsWith("'") ) val = val.substring( 1 );
1399 if( val.endsWith("'") ) val = val.substring( 0, val.length()-1 );
1400
1401 // log.debug("SET name='"+name+"', value='"+val+"'.");
1402
1403 if( name.length() > 0 && val.length() > 0 )
1404 {
1405 val = m_engine.getVariableManager().expandVariables( m_context,
1406 val );
1407
1408 m_context.getPage().setAttribute( name, val );
1409 }
1410 }
1411 catch( Exception e )
1412 {
1413 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1414 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
1415 }
1416
1417 return m_currentElement;
1418 }
1419
1420 /**
1421 * Emits a processing instruction that will disable markup escaping. This is
1422 * very useful if you want to emit HTML directly into the stream.
1423 *
1424 */
1425 private void disableOutputEscaping()
1426 {
1427 addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") );
1428 }
1429
1430 /**
1431 * Gobbles up all hyperlinks that are encased in square brackets.
1432 */
1433 private Element handleHyperlinks( String linktext, int pos )
1434 {
1435 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1436
1437 StringBuilder sb = new StringBuilder(linktext.length()+80);
1438
1439 if( isAccessRule( linktext ) )
1440 {
1441 return handleAccessRule( linktext );
1442 }
1443
1444 if( isMetadata( linktext ) )
1445 {
1446 return handleMetadata( linktext );
1447 }
1448
1449 if( isPluginLink( linktext ) )
1450 {
1451 try
1452 {
1453 PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
1454 //
1455 // This might sometimes fail, especially if there is something which looks
1456 // like a plugin invocation but is really not.
1457 //
1458 if( pluginContent != null )
1459 {
1460 addElement( pluginContent );
1461
1462 pluginContent.executeParse( m_context );
1463 }
1464 }
1465 catch( PluginException e )
1466 {
1467 log.info( "Failed to insert plugin: "+e.getMessage() );
1468 //log.info( "Root cause:",e.getRootThrowable() );
1469 if( !m_wysiwygEditorMode )
1470 {
1471 ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE );
1472 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), e.getMessage() ) ) );
1473 }
1474 }
1475
1476 return m_currentElement;
1477 }
1478
1479 try
1480 {
1481 LinkParser.Link link = m_linkParser.parse(linktext);
1482 linktext = link.getText();
1483 String linkref = link.getReference();
1484
1485 //
1486 // Yes, we now have the components separated.
1487 // linktext = the text the link should have
1488 // linkref = the url or page name.
1489 //
1490 // In many cases these are the same. [linktext|linkref].
1491 //
1492 if( VariableManager.isVariableLink( linktext ) )
1493 {
1494 Content el = new VariableContent(linktext);
1495
1496 addElement( el );
1497 }
1498 else if( isExternalLink( linkref ) )
1499 {
1500 // It's an external link, out of this Wiki
1501
1502 callMutatorChain( m_externalLinkMutatorChain, linkref );
1503
1504 if( isImageLink( linkref ) )
1505 {
1506 handleImageLink( linkref, linktext, link.hasReference() );
1507 }
1508 else
1509 {
1510 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
1511 addElement( outlinkImage() );
1512 }
1513 }
1514 else if( link.isInterwikiLink() )
1515 {
1516 // It's an interwiki link
1517 // InterWiki links also get added to external link chain
1518 // after the links have been resolved.
1519
1520 // FIXME: There is an interesting issue here: We probably should
1521 // URLEncode the wikiPage, but we can't since some of the
1522 // Wikis use slashes (/), which won't survive URLEncoding.
1523 // Besides, we don't know which character set the other Wiki
1524 // is using, so you'll have to write the entire name as it appears
1525 // in the URL. Bugger.
1526
1527 String extWiki = link.getExternalWiki();
1528 String wikiPage = link.getExternalWikiPage();
1529
1530 if( m_wysiwygEditorMode )
1531 {
1532 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
1533 }
1534 else
1535 {
1536 String urlReference = m_engine.getInterWikiURL( extWiki );
1537
1538 if( urlReference != null )
1539 {
1540 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1541 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
1542
1543 if( isImageLink(urlReference) )
1544 {
1545 handleImageLink( urlReference, linktext, link.hasReference() );
1546 }
1547 else
1548 {
1549 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
1550 }
1551
1552 if( isExternalLink(urlReference) )
1553 {
1554 addElement( outlinkImage() );
1555 }
1556 }
1557 else
1558 {
1559 Object[] args = { extWiki };
1560 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
1561 }
1562 }
1563 }
1564 else if( linkref.startsWith("#") )
1565 {
1566 // It defines a local footnote
1567 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1568 }
1569 else if( TextUtil.isNumber( linkref ) )
1570 {
1571 // It defines a reference to a local footnote
1572 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1573 }
1574 else
1575 {
1576 int hashMark = -1;
1577
1578 //
1579 // Internal wiki link, but is it an attachment link?
1580 //
1581 String attachment = findAttachment( linkref );
1582 if( attachment != null )
1583 {
1584 callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1585
1586 if( isImageLink( linkref ) )
1587 {
1588 attachment = m_context.getURL( WikiContext.ATTACH, attachment );
1589 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1590 }
1591 else
1592 {
1593 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1594 }
1595 }
1596 else if( (hashMark = linkref.indexOf('#')) != -1 )
1597 {
1598 // It's an internal Wiki link, but to a named section
1599
1600 String namedSection = linkref.substring( hashMark+1 );
1601 linkref = linkref.substring( 0, hashMark );
1602
1603 linkref = MarkupParser.cleanLink( linkref );
1604
1605 callMutatorChain( m_localLinkMutatorChain, linkref );
1606
1607 String matchedLink;
1608 if( (matchedLink = linkExists( linkref )) != null )
1609 {
1610 String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection));
1611 sectref = sectref.replace('%', '_');
1612 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1613 }
1614 else
1615 {
1616 makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1617 }
1618 }
1619 else
1620 {
1621 // It's an internal Wiki link
1622 linkref = MarkupParser.cleanLink( linkref );
1623
1624 callMutatorChain( m_localLinkMutatorChain, linkref );
1625
1626 String matchedLink = linkExists( linkref );
1627
1628 if( matchedLink != null )
1629 {
1630 makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1631 }
1632 else
1633 {
1634 makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1635 }
1636 }
1637 }
1638 }
1639 catch( ParseException e )
1640 {
1641 log.info("Parser failure: ",e);
1642 Object[] args = { e.getMessage() };
1643 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1644 }
1645
1646 return m_currentElement;
1647 }
1648
1649 private String findAttachment( String linktext )
1650 {
1651 AttachmentManager mgr = m_engine.getAttachmentManager();
1652 Attachment att = null;
1653
1654 try
1655 {
1656 att = mgr.getAttachmentInfo( m_context, linktext );
1657 }
1658 catch( ProviderException e )
1659 {
1660 log.warn("Finding attachments failed: ",e);
1661 return null;
1662 }
1663
1664 if( att != null )
1665 {
1666 return att.getName();
1667 }
1668 else if( linktext.indexOf('/') != -1 )
1669 {
1670 return linktext;
1671 }
1672
1673 return null;
1674 }
1675
1676 /**
1677 * Pushes back any string that has been read. It will obviously
1678 * be pushed back in a reverse order.
1679 *
1680 * @since 2.1.77
1681 */
1682 private void pushBack( String s )
1683 throws IOException
1684 {
1685 for( int i = s.length()-1; i >= 0; i-- )
1686 {
1687 pushBack( s.charAt(i) );
1688 }
1689 }
1690
1691 private Element handleBackslash()
1692 throws IOException
1693 {
1694 int ch = nextToken();
1695
1696 if( ch == '\\' )
1697 {
1698 int ch2 = nextToken();
1699
1700 if( ch2 == '\\' )
1701 {
1702 pushElement( new Element("br").setAttribute("clear","all"));
1703 return popElement("br");
1704 }
1705
1706 pushBack( ch2 );
1707
1708 pushElement( new Element("br") );
1709 return popElement("br");
1710 }
1711
1712 pushBack( ch );
1713
1714 return null;
1715 }
1716
1717 private Element handleUnderscore()
1718 throws IOException
1719 {
1720 int ch = nextToken();
1721 Element el = null;
1722
1723 if( ch == '_' )
1724 {
1725 if( m_isbold )
1726 {
1727 el = popElement("b");
1728 }
1729 else
1730 {
1731 el = pushElement( new Element("b") );
1732 }
1733 m_isbold = !m_isbold;
1734 }
1735 else
1736 {
1737 pushBack( ch );
1738 }
1739
1740 return el;
1741 }
1742
1743
1744 /**
1745 * For example: italics.
1746 */
1747 private Element handleApostrophe()
1748 throws IOException
1749 {
1750 int ch = nextToken();
1751 Element el = null;
1752
1753 if( ch == '\'' )
1754 {
1755 if( m_isitalic )
1756 {
1757 el = popElement("i");
1758 }
1759 else
1760 {
1761 el = pushElement( new Element("i") );
1762 }
1763 m_isitalic = !m_isitalic;
1764 }
1765 else
1766 {
1767 pushBack( ch );
1768 }
1769
1770 return el;
1771 }
1772
1773 private Element handleOpenbrace( boolean isBlock )
1774 throws IOException
1775 {
1776 int ch = nextToken();
1777
1778 if( ch == '{' )
1779 {
1780 int ch2 = nextToken();
1781
1782 if( ch2 == '{' )
1783 {
1784 m_isPre = true;
1785 m_isEscaping = true;
1786 m_isPreBlock = isBlock;
1787
1788 if( isBlock )
1789 {
1790 startBlockLevel();
1791 return pushElement( new Element("pre") );
1792 }
1793
1794 return pushElement( new Element("span").setAttribute("style","font-family:monospace; white-space:pre;") );
1795 }
1796
1797 pushBack( ch2 );
1798
1799 return pushElement( new Element("tt") );
1800 }
1801
1802 pushBack( ch );
1803
1804 return null;
1805 }
1806
1807 /**
1808 * Handles both }} and }}}
1809 */
1810 private Element handleClosebrace()
1811 throws IOException
1812 {
1813 int ch2 = nextToken();
1814
1815 if( ch2 == '}' )
1816 {
1817 int ch3 = nextToken();
1818
1819 if( ch3 == '}' )
1820 {
1821 if( m_isPre )
1822 {
1823 if( m_isPreBlock )
1824 {
1825 popElement( "pre" );
1826 }
1827 else
1828 {
1829 popElement( "span" );
1830 }
1831
1832 m_isPre = false;
1833 m_isEscaping = false;
1834 return m_currentElement;
1835 }
1836
1837 m_plainTextBuf.append("}}}");
1838 return m_currentElement;
1839 }
1840
1841 pushBack( ch3 );
1842
1843 if( !m_isEscaping )
1844 {
1845 return popElement("tt");
1846 }
1847 }
1848
1849 pushBack( ch2 );
1850
1851 return null;
1852 }
1853
1854 private Element handleDash()
1855 throws IOException
1856 {
1857 int ch = nextToken();
1858
1859 if( ch == '-' )
1860 {
1861 int ch2 = nextToken();
1862
1863 if( ch2 == '-' )
1864 {
1865 int ch3 = nextToken();
1866
1867 if( ch3 == '-' )
1868 {
1869 // Empty away all the rest of the dashes.
1870 // Do not forget to return the first non-match back.
1871 do
1872 {
1873 ch = nextToken();
1874 }
1875 while ( ch == '-' );
1876
1877 pushBack(ch);
1878 startBlockLevel();
1879 pushElement( new Element("hr") );
1880 return popElement( "hr" );
1881 }
1882
1883 pushBack( ch3 );
1884 }
1885 pushBack( ch2 );
1886 }
1887
1888 pushBack( ch );
1889
1890 return null;
1891 }
1892
1893 private Element handleHeading()
1894 throws IOException
1895 {
1896 Element el = null;
1897
1898 int ch = nextToken();
1899
1900 Heading hd = new Heading();
1901
1902 if( ch == '!' )
1903 {
1904 int ch2 = nextToken();
1905
1906 if( ch2 == '!' )
1907 {
1908 String title = peekAheadLine();
1909
1910 el = makeHeading( Heading.HEADING_LARGE, title, hd);
1911 }
1912 else
1913 {
1914 pushBack( ch2 );
1915 String title = peekAheadLine();
1916 el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1917 }
1918 }
1919 else
1920 {
1921 pushBack( ch );
1922 String title = peekAheadLine();
1923 el = makeHeading( Heading.HEADING_SMALL, title, hd );
1924 }
1925
1926 callHeadingListenerChain( hd );
1927
1928 m_lastHeading = hd;
1929
1930 if( el != null ) pushElement(el);
1931
1932 return el;
1933 }
1934
1935 /**
1936 * Reads the stream until the next EOL or EOF. Note that it will also read the
1937 * EOL from the stream.
1938 */
1939 private StringBuilder readUntilEOL()
1940 throws IOException
1941 {
1942 int ch;
1943 StringBuilder buf = new StringBuilder( 256 );
1944
1945 while( true )
1946 {
1947 ch = nextToken();
1948
1949 if( ch == -1 )
1950 break;
1951
1952 buf.append( (char) ch );
1953
1954 if( ch == '\n' )
1955 break;
1956 }
1957 return buf;
1958 }
1959
1960 /** Controls whether italic is restarted after a paragraph shift */
1961
1962 private boolean m_restartitalic = false;
1963 private boolean m_restartbold = false;
1964
1965 private boolean m_newLine;
1966
1967 /**
1968 * Starts a block level element, therefore closing
1969 * a potential open paragraph tag.
1970 */
1971 private void startBlockLevel()
1972 {
1973 // These may not continue over block level limits in XHTML
1974
1975 popElement("i");
1976 popElement("b");
1977 popElement("tt");
1978
1979 if( m_isOpenParagraph )
1980 {
1981 m_isOpenParagraph = false;
1982 popElement("p");
1983 m_plainTextBuf.append("\n"); // Just small beautification
1984 }
1985
1986 m_restartitalic = m_isitalic;
1987 m_restartbold = m_isbold;
1988
1989 m_isitalic = false;
1990 m_isbold = false;
1991 }
1992
1993 private static String getListType( char c )
1994 {
1995 if( c == '*' )
1996 {
1997 return "ul";
1998 }
1999 else if( c == '#' )
2000 {
2001 return "ol";
2002 }
2003 throw new InternalWikiException("Parser got faulty list type: "+c);
2004 }
2005 /**
2006 * Like original handleOrderedList() and handleUnorderedList()
2007 * however handles both ordered ('#') and unordered ('*') mixed together.
2008 */
2009
2010 // FIXME: Refactor this; it's a bit messy.
2011
2012 private Element handleGeneralList()
2013 throws IOException
2014 {
2015 startBlockLevel();
2016
2017 String strBullets = readWhile( "*#" );
2018 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution
2019 int numBullets = strBullets.length();
2020
2021 // override the beginning portion of bullet pattern to be like the previous
2022 // to simulate PHPWiki style lists
2023
2024 if(m_allowPHPWikiStyleLists)
2025 {
2026 // only substitute if different
2027 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
2028 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) )
2029 {
2030 if(numBullets <= m_genlistlevel)
2031 {
2032 // Substitute all but the last character (keep the expressed bullet preference)
2033 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "")
2034 + strBullets.substring(numBullets-1, numBullets);
2035 }
2036 else
2037 {
2038 strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets);
2039 }
2040 }
2041 }
2042
2043 //
2044 // Check if this is still of the same type
2045 //
2046 if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
2047 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) )
2048 {
2049 if( numBullets > m_genlistlevel )
2050 {
2051 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) );
2052
2053 for( ; m_genlistlevel < numBullets; m_genlistlevel++ )
2054 {
2055 // bullets are growing, get from new bullet list
2056 pushElement( new Element("li") );
2057 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) ));
2058 }
2059 }
2060 else if( numBullets < m_genlistlevel )
2061 {
2062 // Close the previous list item.
2063 // buf.append( m_renderer.closeListItem() );
2064 popElement( "li" );
2065
2066 for( ; m_genlistlevel > numBullets; m_genlistlevel-- )
2067 {
2068 // bullets are shrinking, get from old bullet list
2069
2070 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
2071 if( m_genlistlevel > 0 )
2072 {
2073 popElement( "li" );
2074 }
2075
2076 }
2077 }
2078 else
2079 {
2080 if( m_genlistlevel > 0 )
2081 {
2082 popElement( "li" );
2083 }
2084 }
2085 }
2086 else
2087 {
2088 //
2089 // The pattern has changed, unwind and restart
2090 //
2091 int numEqualBullets;
2092 int numCheckBullets;
2093
2094 // find out how much is the same
2095 numEqualBullets = 0;
2096 numCheckBullets = Math.min(numBullets,m_genlistlevel);
2097
2098 while( numEqualBullets < numCheckBullets )
2099 {
2100 // if the bullets are equal so far, keep going
2101 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets))
2102 numEqualBullets++;
2103 // otherwise giveup, we have found how many are equal
2104 else
2105 break;
2106 }
2107
2108 //unwind
2109 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- )
2110 {
2111 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) );
2112 if( m_genlistlevel > numBullets )
2113 {
2114 popElement("li");
2115 }
2116 }
2117
2118 //rewind
2119
2120 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) );
2121 for(int i = numEqualBullets; i < numBullets; i++)
2122 {
2123 pushElement( new Element("li") );
2124 pushElement( new Element( getListType( strBullets.charAt(i) ) ) );
2125 }
2126 m_genlistlevel = numBullets;
2127 }
2128
2129 //
2130 // Push a new list item, and eat away any extra whitespace
2131 //
2132 pushElement( new Element("li") );
2133 readWhile(" ");
2134
2135 // work done, remember the new bullet list (in place of old one)
2136 m_genlistBulletBuffer.setLength(0);
2137 m_genlistBulletBuffer.append(strBullets);
2138
2139 return m_currentElement;
2140 }
2141
2142 private Element unwindGeneralList()
2143 {
2144 //unwind
2145 for( ; m_genlistlevel > 0; m_genlistlevel-- )
2146 {
2147 popElement( "li" );
2148 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
2149 }
2150
2151 m_genlistBulletBuffer.setLength(0);
2152
2153 return null;
2154 }
2155
2156
2157 private Element handleDefinitionList()
2158 throws IOException
2159 {
2160 if( !m_isdefinition )
2161 {
2162 m_isdefinition = true;
2163
2164 startBlockLevel();
2165
2166 pushElement( new Element("dl") );
2167 return pushElement( new Element("dt") );
2168 }
2169
2170 return null;
2171 }
2172
2173 private Element handleOpenbracket()
2174 throws IOException
2175 {
2176 StringBuilder sb = new StringBuilder(40);
2177 int pos = getPosition();
2178 int ch = nextToken();
2179 boolean isPlugin = false;
2180
2181 if( ch == '[' )
2182 {
2183 if( m_wysiwygEditorMode )
2184 {
2185 sb.append( '[' );
2186 }
2187
2188 sb.append( (char)ch );
2189
2190 while( (ch = nextToken()) == '[' )
2191 {
2192 sb.append( (char)ch );
2193 }
2194 }
2195
2196
2197 if( ch == '{' )
2198 {
2199 isPlugin = true;
2200 }
2201
2202 pushBack( ch );
2203
2204 if( sb.length() > 0 )
2205 {
2206 m_plainTextBuf.append( sb );
2207 return m_currentElement;
2208 }
2209
2210 //
2211 // Find end of hyperlink
2212 //
2213
2214 ch = nextToken();
2215 int nesting = 1; // Check for nested plugins
2216
2217 while( ch != -1 )
2218 {
2219 int ch2 = nextToken(); pushBack(ch2);
2220
2221 if( isPlugin )
2222 {
2223 if( ch == '[' && ch2 == '{' )
2224 {
2225 nesting++;
2226 }
2227 else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' )
2228 {
2229 break;
2230 }
2231 else if( ch == '}' && ch2 == ']' )
2232 {
2233 // NB: This will be decremented once at the end
2234 nesting--;
2235 }
2236 }
2237 else
2238 {
2239 if( ch == ']' )
2240 {
2241 break;
2242 }
2243 }
2244
2245 sb.append( (char) ch );
2246
2247 ch = nextToken();
2248 }
2249
2250 //
2251 // If the link is never finished, do some tricks to display the rest of the line
2252 // unchanged.
2253 //
2254 if( ch == -1 )
2255 {
2256 log.debug("Warning: unterminated link detected!");
2257 m_isEscaping = true;
2258 m_plainTextBuf.append( sb );
2259 flushPlainText();
2260 m_isEscaping = false;
2261 return m_currentElement;
2262 }
2263
2264 return handleHyperlinks( sb.toString(), pos );
2265 }
2266
2267 /**
2268 * Reads the stream until the current brace is closed or stream end.
2269 */
2270 private String readBraceContent( char opening, char closing )
2271 throws IOException
2272 {
2273 StringBuilder sb = new StringBuilder(40);
2274 int braceLevel = 1;
2275 int ch;
2276 while(( ch = nextToken() ) != -1 )
2277 {
2278 if( ch == '\\' )
2279 {
2280 continue;
2281 }
2282 else if ( ch == opening )
2283 {
2284 braceLevel++;
2285 }
2286 else if ( ch == closing )
2287 {
2288 braceLevel--;
2289 if (braceLevel==0)
2290 {
2291 break;
2292 }
2293 }
2294 sb.append( (char)ch );
2295 }
2296 return sb.toString();
2297 }
2298
2299
2300 /**
2301 * Handles constructs of type %%(style) and %%class
2302 * @param newLine
2303 * @return An Element containing the div or span, depending on the situation.
2304 * @throws IOException
2305 */
2306 private Element handleDiv( boolean newLine )
2307 throws IOException
2308 {
2309 int ch = nextToken();
2310 Element el = null;
2311
2312 if( ch == '%' )
2313 {
2314 String style = null;
2315 String clazz = null;
2316
2317 ch = nextToken();
2318
2319 //
2320 // Style or class?
2321 //
2322 if( ch == '(' )
2323 {
2324 style = readBraceContent('(',')');
2325 }
2326 else if( Character.isLetter( (char) ch ) )
2327 {
2328 pushBack( ch );
2329 clazz = readUntil( " \t\n\r" );
2330 ch = nextToken();
2331
2332 //
2333 // Pop out only spaces, so that the upcoming EOL check does not check the
2334 // next line.
2335 //
2336 if( ch == '\n' || ch == '\r' )
2337 {
2338 pushBack(ch);
2339 }
2340 }
2341 else
2342 {
2343 //
2344 // Anything else stops.
2345 //
2346
2347 pushBack(ch);
2348
2349 try
2350 {
2351 Boolean isSpan = m_styleStack.pop();
2352
2353 if( isSpan == null )
2354 {
2355 // Fail quietly
2356 }
2357 else if( isSpan.booleanValue() )
2358 {
2359 el = popElement( "span" );
2360 }
2361 else
2362 {
2363 el = popElement( "div" );
2364 }
2365 }
2366 catch( EmptyStackException e )
2367 {
2368 log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened.");
2369 return m_currentElement;
2370 }
2371
2372 return el;
2373 }
2374
2375 //
2376 // Check if there is an attempt to do something nasty
2377 //
2378
2379 try
2380 {
2381 style = StringEscapeUtils.unescapeHtml(style);
2382 if( style != null && style.indexOf("javascript:") != -1 )
2383 {
2384 log.debug("Attempt to output javascript within CSS:"+style);
2385 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2386 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
2387 }
2388 }
2389 catch( NumberFormatException e )
2390 {
2391 //
2392 // If there are unknown entities, we don't want the parser to stop.
2393 //
2394 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2395 String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
2396 return addElement( makeError( msg ) );
2397 }
2398
2399 //
2400 // Decide if we should open a div or a span?
2401 //
2402 String eol = peekAheadLine();
2403
2404 if( eol.trim().length() > 0 )
2405 {
2406 // There is stuff after the class
2407
2408 el = new Element("span");
2409
2410 m_styleStack.push( Boolean.TRUE );
2411 }
2412 else
2413 {
2414 startBlockLevel();
2415 el = new Element("div");
2416 m_styleStack.push( Boolean.FALSE );
2417 }
2418
2419 if( style != null ) el.setAttribute("style", style);
2420 if( clazz != null ) el.setAttribute("class", clazz );
2421 el = pushElement( el );
2422
2423 return el;
2424 }
2425
2426 pushBack(ch);
2427
2428 return el;
2429 }
2430
2431 private Element handleSlash( boolean newLine )
2432 throws IOException
2433 {
2434 int ch = nextToken();
2435
2436 pushBack(ch);
2437 if( ch == '%' && !m_styleStack.isEmpty() )
2438 {
2439 return handleDiv( newLine );
2440 }
2441
2442 return null;
2443 }
2444
2445 private Element handleBar( boolean newLine )
2446 throws IOException
2447 {
2448 Element el = null;
2449
2450 if( !m_istable && !newLine )
2451 {
2452 return null;
2453 }
2454
2455 //
2456 // If the bar is in the first column, we will either start
2457 // a new table or continue the old one.
2458 //
2459
2460 if( newLine )
2461 {
2462 if( !m_istable )
2463 {
2464 startBlockLevel();
2465 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
2466 m_istable = true;
2467 m_rowNum = 0;
2468 }
2469
2470 m_rowNum++;
2471 Element tr = ( m_rowNum % 2 != 0 )
2472 ? new Element("tr").setAttribute("class", "odd")
2473 : new Element("tr");
2474 el = pushElement( tr );
2475 }
2476
2477 //
2478 // Check out which table cell element to start;
2479 // a header element (th) or a regular element (td).
2480 //
2481 int ch = nextToken();
2482
2483 if( ch == '|' )
2484 {
2485 if( !newLine )
2486 {
2487 el = popElement("th");
2488 if( el == null ) popElement("td");
2489 }
2490 el = pushElement( new Element("th") );
2491 }
2492 else
2493 {
2494 if( !newLine )
2495 {
2496 el = popElement("td");
2497 if( el == null ) popElement("th");
2498 }
2499
2500 el = pushElement( new Element("td") );
2501
2502 pushBack( ch );
2503 }
2504
2505 return el;
2506 }
2507
2508 /**
2509 * Generic escape of next character or entity.
2510 */
2511 private Element handleTilde()
2512 throws IOException
2513 {
2514 int ch = nextToken();
2515
2516 if( ch == ' ' )
2517 {
2518 if( m_wysiwygEditorMode )
2519 {
2520 m_plainTextBuf.append( "~ " );
2521 }
2522 return m_currentElement;
2523 }
2524
2525 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
2526 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
2527 ch == '{' || ch == ']' || ch == '}' || ch == '%' )
2528 {
2529 if( m_wysiwygEditorMode )
2530 {
2531 m_plainTextBuf.append( '~' );
2532 }
2533
2534 m_plainTextBuf.append( (char)ch );
2535 m_plainTextBuf.append(readWhile( ""+(char)ch ));
2536 return m_currentElement;
2537 }
2538
2539 // No escape.
2540 pushBack( ch );
2541
2542 return null;
2543 }
2544
2545 private void fillBuffer( Element startElement )
2546 throws IOException
2547 {
2548 m_currentElement = startElement;
2549
2550 boolean quitReading = false;
2551 m_newLine = true;
2552 disableOutputEscaping();
2553
2554 while(!quitReading)
2555 {
2556 int ch = nextToken();
2557
2558 if( ch == -1 ) break;
2559
2560 //
2561 // Check if we're actually ending the preformatted mode.
2562 // We still must do an entity transformation here.
2563 //
2564 if( m_isEscaping )
2565 {
2566 if( ch == '}' )
2567 {
2568 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
2569 }
2570 else if( ch == -1 )
2571 {
2572 quitReading = true;
2573 }
2574 else if( ch == '\r' )
2575 {
2576 // DOS line feeds we ignore.
2577 }
2578 else if( ch == '<' )
2579 {
2580 m_plainTextBuf.append( "<" );
2581 }
2582 else if( ch == '>' )
2583 {
2584 m_plainTextBuf.append( ">" );
2585 }
2586 else if( ch == '&' )
2587 {
2588 m_plainTextBuf.append( "&" );
2589 }
2590 else if( ch == '~' )
2591 {
2592 String braces = readWhile("}");
2593 if( braces.length() >= 3 )
2594 {
2595 m_plainTextBuf.append("}}}");
2596
2597 braces = braces.substring(3);
2598 }
2599 else
2600 {
2601 m_plainTextBuf.append( (char) ch );
2602 }
2603
2604 for( int i = braces.length()-1; i >= 0; i-- )
2605 {
2606 pushBack(braces.charAt(i));
2607 }
2608 }
2609 else
2610 {
2611 m_plainTextBuf.append( (char) ch );
2612 }
2613
2614 continue;
2615 }
2616
2617 //
2618 // An empty line stops a list
2619 //
2620 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 )
2621 {
2622 m_plainTextBuf.append(unwindGeneralList());
2623 }
2624
2625 if( m_newLine && ch != '|' && m_istable )
2626 {
2627 popElement("table");
2628 m_istable = false;
2629 }
2630
2631 int skip = IGNORE;
2632
2633 //
2634 // Do the actual parsing and catch any errors.
2635 //
2636 try
2637 {
2638 skip = parseToken( ch );
2639 }
2640 catch( IllegalDataException e )
2641 {
2642 log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage());
2643
2644 makeError("Error: "+cleanupSuspectData(e.getMessage()) );
2645 }
2646
2647 //
2648 // The idea is as follows: If the handler method returns
2649 // an element (el != null), it is assumed that it has been
2650 // added in the stack. Otherwise the character is added
2651 // as is to the plaintext buffer.
2652 //
2653 // For the transition phase, if s != null, it also gets
2654 // added in the plaintext buffer.
2655 //
2656
2657 switch( skip )
2658 {
2659 case ELEMENT:
2660 m_newLine = false;
2661 break;
2662
2663 case CHARACTER:
2664 m_plainTextBuf.append( (char) ch );
2665 m_newLine = false;
2666 break;
2667
2668 case IGNORE:
2669 default:
2670 break;
2671 }
2672 }
2673
2674 closeHeadings();
2675 popElement("domroot");
2676 }
2677
2678 private String cleanupSuspectData( String s )
2679 {
2680 StringBuilder sb = new StringBuilder( s.length() );
2681
2682 for( int i = 0; i < s.length(); i++ )
2683 {
2684 char c = s.charAt(i);
2685
2686 if( Verifier.isXMLCharacter( c ) ) sb.append( c );
2687 else sb.append( "0x"+Integer.toString(c,16).toUpperCase() );
2688 }
2689
2690 return sb.toString();
2691 }
2692
2693 /** The token is a plain character. */
2694 protected static final int CHARACTER = 0;
2695
2696 /** The token is a wikimarkup element. */
2697 protected static final int ELEMENT = 1;
2698
2699 /** The token is to be ignored. */
2700 protected static final int IGNORE = 2;
2701
2702 /**
2703 * Return CHARACTER, if you think this was a plain character; ELEMENT, if
2704 * you think this was a wiki markup element, and IGNORE, if you think
2705 * we should ignore this altogether.
2706 * <p>
2707 * To add your own MarkupParser, you can override this method, but it
2708 * is recommended that you call super.parseToken() as well to gain advantage
2709 * of JSPWiki's own markup. You can call it at the start of your own
2710 * parseToken() or end - it does not matter.
2711 *
2712 * @param ch The character under investigation
2713 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2714 * @throws IOException If parsing fails.
2715 */
2716 protected int parseToken( int ch )
2717 throws IOException
2718 {
2719 Element el = null;
2720
2721 //
2722 // Now, check the incoming token.
2723 //
2724 switch( ch )
2725 {
2726 case '\r':
2727 // DOS linefeeds we forget
2728 return IGNORE;
2729
2730 case '\n':
2731 //
2732 // Close things like headings, etc.
2733 //
2734
2735 // FIXME: This is not really very fast
2736
2737 closeHeadings();
2738
2739 popElement("dl"); // Close definition lists.
2740 if( m_istable )
2741 {
2742 popElement("tr");
2743 }
2744
2745 m_isdefinition = false;
2746
2747 if( m_newLine )
2748 {
2749 // Paragraph change.
2750 startBlockLevel();
2751
2752 //
2753 // Figure out which elements cannot be enclosed inside
2754 // a <p></p> pair according to XHTML rules.
2755 //
2756 String nextLine = peekAheadLine();
2757 if( nextLine.length() == 0 ||
2758 (nextLine.length() > 0 &&
2759 !nextLine.startsWith("{{{") &&
2760 !nextLine.startsWith("----") &&
2761 !nextLine.startsWith("%%") &&
2762 "*#!;".indexOf( nextLine.charAt(0) ) == -1) )
2763 {
2764 pushElement( new Element("p") );
2765 m_isOpenParagraph = true;
2766
2767 if( m_restartitalic )
2768 {
2769 pushElement( new Element("i") );
2770 m_isitalic = true;
2771 m_restartitalic = false;
2772 }
2773 if( m_restartbold )
2774 {
2775 pushElement( new Element("b") );
2776 m_isbold = true;
2777 m_restartbold = false;
2778 }
2779 }
2780 }
2781 else
2782 {
2783 m_plainTextBuf.append("\n");
2784 m_newLine = true;
2785 }
2786 return IGNORE;
2787
2788
2789 case '\\':
2790 el = handleBackslash();
2791 break;
2792
2793 case '_':
2794 el = handleUnderscore();
2795 break;
2796
2797 case '\'':
2798 el = handleApostrophe();
2799 break;
2800
2801 case '{':
2802 el = handleOpenbrace( m_newLine );
2803 break;
2804
2805 case '}':
2806 el = handleClosebrace();
2807 break;
2808
2809 case '-':
2810 if( m_newLine )
2811 el = handleDash();
2812
2813 break;
2814
2815 case '!':
2816 if( m_newLine )
2817 {
2818 el = handleHeading();
2819 }
2820 break;
2821
2822 case ';':
2823 if( m_newLine )
2824 {
2825 el = handleDefinitionList();
2826 }
2827 break;
2828
2829 case ':':
2830 if( m_isdefinition )
2831 {
2832 popElement("dt");
2833 el = pushElement( new Element("dd") );
2834 m_isdefinition = false;
2835 }
2836 break;
2837
2838 case '[':
2839 el = handleOpenbracket();
2840 break;
2841
2842 case '*':
2843 if( m_newLine )
2844 {
2845 pushBack('*');
2846 el = handleGeneralList();
2847 }
2848 break;
2849
2850 case '#':
2851 if( m_newLine )
2852 {
2853 pushBack('#');
2854 el = handleGeneralList();
2855 }
2856 break;
2857
2858 case '|':
2859 el = handleBar( m_newLine );
2860 break;
2861
2862 case '~':
2863 el = handleTilde();
2864 break;
2865
2866 case '%':
2867 el = handleDiv( m_newLine );
2868 break;
2869
2870 case '/':
2871 el = handleSlash( m_newLine );
2872 break;
2873
2874 default:
2875 break;
2876 }
2877
2878 return el != null ? ELEMENT : CHARACTER;
2879 }
2880
2881 private void closeHeadings()
2882 {
2883 if( m_lastHeading != null && !m_wysiwygEditorMode )
2884 {
2885 // Add the hash anchor element at the end of the heading
2886 addElement( new Element("a").setAttribute( "class","hashlink" ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) );
2887 m_lastHeading = null;
2888 }
2889 popElement("h2");
2890 popElement("h3");
2891 popElement("h4");
2892 }
2893
2894 /**
2895 * Parses the entire document from the Reader given in the constructor or
2896 * set by {@link #setInputReader(Reader)}.
2897 *
2898 * @return A WikiDocument, ready to be passed to the renderer.
2899 * @throws IOException If parsing cannot be accomplished.
2900 */
2901 public WikiDocument parse()
2902 throws IOException
2903 {
2904 WikiDocument d = new WikiDocument( m_context.getPage() );
2905 d.setContext( m_context );
2906
2907 Element rootElement = new Element("domroot");
2908
2909 d.setRootElement( rootElement );
2910
2911 fillBuffer( rootElement );
2912
2913 paragraphify(rootElement);
2914
2915 return d;
2916 }
2917
2918 /**
2919 * Checks out that the first paragraph is correctly installed.
2920 *
2921 * @param rootElement
2922 */
2923 private void paragraphify(Element rootElement)
2924 {
2925 //
2926 // Add the paragraph tag to the first paragraph
2927 //
2928 List kids = rootElement.getContent();
2929
2930 if( rootElement.getChild("p") != null )
2931 {
2932 ArrayList<Content> ls = new ArrayList<Content>();
2933 int idxOfFirstContent = 0;
2934 int count = 0;
2935
2936 for( Iterator i = kids.iterator(); i.hasNext(); count++ )
2937 {
2938 Content c = (Content) i.next();
2939 if( c instanceof Element )
2940 {
2941 String name = ((Element)c).getName();
2942 if( isBlockLevel(name) ) break;
2943 }
2944
2945 if( !(c instanceof ProcessingInstruction) )
2946 {
2947 ls.add( c );
2948 if( idxOfFirstContent == 0 ) idxOfFirstContent = count;
2949 }
2950 }
2951
2952 //
2953 // If there were any elements, then add a new <p> (unless it would
2954 // be an empty one)
2955 //
2956 if( ls.size() > 0 )
2957 {
2958 Element newel = new Element("p");
2959
2960 for( Iterator i = ls.iterator(); i.hasNext(); )
2961 {
2962 Content c = (Content) i.next();
2963
2964 c.detach();
2965 newel.addContent(c);
2966 }
2967
2968 //
2969 // Make sure there are no empty <p/> tags added.
2970 //
2971 if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() )
2972 rootElement.addContent(idxOfFirstContent, newel);
2973 }
2974 }
2975 }
2976
2977
2978 /**
2979 * Compares two Strings, and if one starts with the other, then
2980 * returns null. Otherwise just like the normal Comparator
2981 * for strings.
2982 *
2983 * @since
2984 */
2985 private static class StartingComparator implements Comparator<String>
2986 {
2987 public int compare( String s1, String s2 )
2988 {
2989 if( s1.length() > s2.length() )
2990 {
2991 if( s1.startsWith(s2) && s2.length() > 1 ) return 0;
2992 }
2993 else
2994 {
2995 if( s2.startsWith(s1) && s1.length() > 1 ) return 0;
2996 }
2997
2998 return s1.compareTo( s2 );
2999 }
3000
3001 }
3002
3003
3004 }
3005