001    /*
002        Licensed to the Apache Software Foundation (ASF) under one
003        or more contributor license agreements.  See the NOTICE file
004        distributed with this work for additional information
005        regarding copyright ownership.  The ASF licenses this file
006        to you under the Apache License, Version 2.0 (the
007        "License"); you may not use this file except in compliance
008        with the License.  You may obtain a copy of the License at
009    
010           http://www.apache.org/licenses/LICENSE-2.0
011    
012        Unless required by applicable law or agreed to in writing,
013        software distributed under the License is distributed on an
014        "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015        KIND, either express or implied.  See the License for the
016        specific language governing permissions and limitations
017        under the License.  
018     */
019    package org.apache.wiki.parser;
020    
021    import java.io.IOException;
022    import java.io.Reader;
023    import java.io.StringReader;
024    import java.text.MessageFormat;
025    import java.util.ArrayList;
026    import java.util.Arrays;
027    import java.util.Collection;
028    import java.util.Collections;
029    import java.util.Comparator;
030    import java.util.EmptyStackException;
031    import java.util.HashMap;
032    import java.util.Iterator;
033    import java.util.List;
034    import java.util.Map;
035    import java.util.Properties;
036    import java.util.ResourceBundle;
037    import java.util.Stack;
038    
039    import javax.xml.transform.Result;
040    
041    import org.apache.commons.lang.StringEscapeUtils;
042    import org.apache.commons.lang.StringUtils;
043    import org.apache.log4j.Logger;
044    import org.apache.oro.text.GlobCompiler;
045    import org.apache.oro.text.regex.MalformedPatternException;
046    import org.apache.oro.text.regex.MatchResult;
047    import org.apache.oro.text.regex.Pattern;
048    import org.apache.oro.text.regex.PatternCompiler;
049    import org.apache.oro.text.regex.PatternMatcher;
050    import org.apache.oro.text.regex.Perl5Compiler;
051    import org.apache.oro.text.regex.Perl5Matcher;
052    import org.apache.wiki.InternalWikiException;
053    import org.apache.wiki.StringTransmutator;
054    import org.apache.wiki.VariableManager;
055    import org.apache.wiki.WikiContext;
056    import org.apache.wiki.WikiPage;
057    import org.apache.wiki.api.exceptions.PluginException;
058    import org.apache.wiki.api.exceptions.ProviderException;
059    import org.apache.wiki.api.plugin.WikiPlugin;
060    import org.apache.wiki.attachment.Attachment;
061    import org.apache.wiki.attachment.AttachmentManager;
062    import org.apache.wiki.auth.WikiSecurityException;
063    import org.apache.wiki.auth.acl.Acl;
064    import org.apache.wiki.i18n.InternationalizationManager;
065    import org.apache.wiki.preferences.Preferences;
066    import org.apache.wiki.render.CleanTextRenderer;
067    import org.apache.wiki.render.RenderingManager;
068    import org.apache.wiki.util.TextUtil;
069    import org.jdom2.Attribute;
070    import org.jdom2.Content;
071    import org.jdom2.Element;
072    import org.jdom2.IllegalDataException;
073    import org.jdom2.ProcessingInstruction;
074    import org.jdom2.Verifier;
075    
076    /**
077     *  Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the
078     *  heart and soul of JSPWiki : make sure you test properly anything that is added,
079     *  or else it breaks down horribly.
080     *
081     *  @since  2.4
082     */
083    public class JSPWikiMarkupParser extends MarkupParser {
084        
085        /** Name of the outlink image; relative path to the JSPWiki directory. */
086        private static final String OUTLINK_IMAGE = "images/out.png";
087    
088        /** The value for anchor element <tt>class</tt> attributes when used
089          * for wiki page (normal) links. The value is "wikipage". */
090        public static final String CLASS_WIKIPAGE = "wikipage";
091    
092        /** The value for anchor element <tt>class</tt> attributes when used
093          * for edit page links. The value is "createpage". */
094        public static final String CLASS_EDITPAGE = "createpage";
095    
096        /** The value for anchor element <tt>class</tt> attributes when used
097          * for interwiki page links. The value is "interwiki". */
098        public static final String CLASS_INTERWIKI = "interwiki";
099    
100        protected static final int              READ          = 0;
101        protected static final int              EDIT          = 1;
102        protected static final int              EMPTY         = 2;  // Empty message
103        protected static final int              LOCAL         = 3;
104        protected static final int              LOCALREF      = 4;
105        protected static final int              IMAGE         = 5;
106        protected static final int              EXTERNAL      = 6;
107        protected static final int              INTERWIKI     = 7;
108        protected static final int              IMAGELINK     = 8;
109        protected static final int              IMAGEWIKILINK = 9;
110        protected static final int              ATTACHMENT    = 10;
111    
112        private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class );
113    
114        private boolean        m_isbold       = false;
115        private boolean        m_isitalic     = false;
116        private boolean        m_istable      = false;
117        private boolean        m_isPre        = false;
118        private boolean        m_isEscaping   = false;
119        private boolean        m_isdefinition = false;
120        private boolean        m_isPreBlock   = false;
121    
122        /** Contains style information, in multiple forms. */
123        private Stack<Boolean> m_styleStack   = new Stack<Boolean>();
124    
125         // general list handling
126        private int            m_genlistlevel = 0;
127        private StringBuilder  m_genlistBulletBuffer = new StringBuilder(10);  // stores the # and * pattern
128        private boolean        m_allowPHPWikiStyleLists = true;
129    
130    
131        private boolean        m_isOpenParagraph = false;
132    
133        /** Keeps image regexp Patterns */
134        private List<Pattern>  m_inlineImagePatterns;
135    
136        /** Parser for extended link functionality. */
137        private LinkParser     m_linkParser = new LinkParser();
138    
139        private PatternMatcher m_inlineMatcher = new Perl5Matcher();
140    
141        /** Keeps track of any plain text that gets put in the Text nodes */
142        private StringBuilder  m_plainTextBuf = new StringBuilder(20);
143    
144        private Element        m_currentElement;
145    
146        /** Keep track of duplicate header names.  */
147        private Map<String, Integer>   m_titleSectionCounter = new HashMap<String, Integer>();
148    
149        /** If true, consider CamelCase hyperlinks as well. */
150        public static final String     PROP_CAMELCASELINKS   = "jspwiki.translatorReader.camelCaseLinks";
151    
152        /** If true, all hyperlinks are translated as well, regardless whether they
153            are surrounded by brackets. */
154        public static final String     PROP_PLAINURIS        = "jspwiki.translatorReader.plainUris";
155    
156        /** If true, all outward links (external links) have a small link image appended. */
157        public static final String     PROP_USEOUTLINKIMAGE  = "jspwiki.translatorReader.useOutlinkImage";
158    
159        /** If true, all outward attachment info links have a small link image appended. */
160        public static final String     PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
161    
162        /** If set to "true", all external links are tagged with 'rel="nofollow"' */
163        public static final String     PROP_USERELNOFOLLOW   = "jspwiki.translatorReader.useRelNofollow";
164    
165        /** If true, then considers CamelCase links as well. */
166        private boolean                m_camelCaseLinks      = false;
167    
168        /** If true, then generate special output for wysiwyg editing in certain cases */
169        private boolean                m_wysiwygEditorMode     = false;
170    
171        /** If true, consider URIs that have no brackets as well. */
172        // FIXME: Currently reserved, but not used.
173        private boolean                m_plainUris           = false;
174    
175        /** If true, all outward links use a small link image. */
176        private boolean                m_useOutlinkImage     = true;
177    
178        private boolean                m_useAttachmentImage  = true;
179    
180        /** If true, allows raw HTML. */
181        private boolean                m_allowHTML           = false;
182    
183        private boolean                m_useRelNofollow      = false;
184    
185        private PatternCompiler        m_compiler = new Perl5Compiler();
186    
187        static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
188    
189        private PatternMatcher         m_camelCaseMatcher = new Perl5Matcher();
190        private Pattern                m_camelCasePattern;
191    
192        private int                    m_rowNum              = 1;
193    
194        private Heading                m_lastHeading         = null;
195        
196        /**
197         *  This list contains all IANA registered URI protocol
198         *  types as of September 2004 + a few well-known extra types.
199         *
200         *  JSPWiki recognises all of them as external links.
201         *
202         *  This array is sorted during class load, so you can just dump
203         *  here whatever you want in whatever order you want.
204         */
205        static final String[] EXTERNAL_LINKS = {
206            "http:", "ftp:", "https:", "mailto:",
207            "news:", "file:", "rtsp:", "mms:", "ldap:",
208            "gopher:", "nntp:", "telnet:", "wais:",
209            "prospero:", "z39.50s", "z39.50r", "vemmi:",
210            "imap:", "nfs:", "acap:", "tip:", "pop:",
211            "dav:", "opaquelocktoken:", "sip:", "sips:",
212            "tel:", "fax:", "modem:", "soap.beep:", "soap.beeps",
213            "xmlrpc.beep", "xmlrpc.beeps", "urn:", "go:",
214            "h323:", "ipp:", "tftp:", "mupdate:", "pres:",
215            "im:", "mtqp", "smb:" };
216    
217        private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns";
218    
219        private static final String CAMELCASE_PATTERN     = "JSPWikiMarkupParser.camelCasePattern";
220    
221        private static final String[] CLASS_TYPES =
222        {
223           CLASS_WIKIPAGE,
224           CLASS_EDITPAGE,
225           "",
226           "footnote",
227           "footnoteref",
228           "",
229           "external",
230           CLASS_INTERWIKI,
231           "external",
232           CLASS_WIKIPAGE,
233           "attachment"
234        };
235    
236    
237        /**
238         *  This Comparator is used to find an external link from c_externalLinks.  It
239         *  checks if the link starts with the other arraythingie.
240         */
241        private static Comparator<String> c_startingComparator = new StartingComparator();
242    
243        static
244        {
245            Arrays.sort( EXTERNAL_LINKS );
246        }
247    
248        /**
249         *  Creates a markup parser.
250         *  
251         *  @param context The WikiContext which controls the parsing
252         *  @param in Where the data is read from.
253         */
254        public JSPWikiMarkupParser( WikiContext context, Reader in )
255        {
256            super( context, in );
257            initialize();
258        }
259    
260        // FIXME: parsers should be pooled for better performance.
261        @SuppressWarnings("unchecked")
262        private void initialize()
263        {
264            PatternCompiler compiler         = new GlobCompiler();
265            List<Pattern>   compiledpatterns;
266    
267            //
268            //  We cache compiled patterns in the engine, since their creation is
269            //  really expensive
270            //
271            compiledpatterns = (List<Pattern>)m_engine.getAttribute( INLINE_IMAGE_PATTERNS );
272    
273            if( compiledpatterns == null )
274            {
275                compiledpatterns = new ArrayList<Pattern>(20);
276                Collection< String > ptrns = m_engine.getAllInlinedImagePatterns();
277    
278                //
279                //  Make them into Regexp Patterns.  Unknown patterns
280                //  are ignored.
281                //
282                for( Iterator< String > i = ptrns.iterator(); i.hasNext(); )
283                {
284                    try
285                    {
286                        compiledpatterns.add( compiler.compile( i.next(),
287                                                                GlobCompiler.DEFAULT_MASK|GlobCompiler.READ_ONLY_MASK ) );
288                    }
289                    catch( MalformedPatternException e )
290                    {
291                        log.error("Malformed pattern in properties: ", e );
292                    }
293                }
294    
295                m_engine.setAttribute( INLINE_IMAGE_PATTERNS, compiledpatterns );
296            }
297    
298            m_inlineImagePatterns = Collections.unmodifiableList(compiledpatterns);
299    
300            m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN );
301            if( m_camelCasePattern == null )
302            {
303                try
304                {
305                    m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,
306                                                             Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
307                }
308                catch( MalformedPatternException e )
309                {
310                    log.fatal("Internal error: Someone put in a faulty pattern.",e);
311                    throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader");
312                }
313                m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
314            }
315            //
316            //  Set the properties.
317            //
318            Properties props      = m_engine.getWikiProperties();
319    
320            String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
321    
322            if( cclinks != null )
323            {
324                m_camelCaseLinks = TextUtil.isPositive( cclinks );
325            }
326            else
327            {
328                m_camelCaseLinks  = TextUtil.getBooleanProperty( props,
329                                                                 PROP_CAMELCASELINKS,
330                                                                 m_camelCaseLinks );
331            }
332    
333    
334    
335            Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE );
336            if( wysiwygVariable != null )
337            {
338                m_wysiwygEditorMode = wysiwygVariable.booleanValue();
339            }
340    
341            m_plainUris           = getLocalBooleanProperty( m_context,
342                                                             props,
343                                                             PROP_PLAINURIS,
344                                                             m_plainUris );
345            m_useOutlinkImage     = getLocalBooleanProperty( m_context,
346                                                             props,
347                                                             PROP_USEOUTLINKIMAGE,
348                                                             m_useOutlinkImage );
349            m_useAttachmentImage  = getLocalBooleanProperty( m_context,
350                                                             props,
351                                                             PROP_USEATTACHMENTIMAGE,
352                                                             m_useAttachmentImage );
353            m_allowHTML           = getLocalBooleanProperty( m_context,
354                                                             props,
355                                                             MarkupParser.PROP_ALLOWHTML,
356                                                             m_allowHTML );
357    
358            m_useRelNofollow      = getLocalBooleanProperty( m_context,
359                                                             props,
360                                                             PROP_USERELNOFOLLOW,
361                                                             m_useRelNofollow );
362    
363            if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null )
364            {
365                disableAccessRules();
366            }
367    
368            m_context.getPage().setHasMetadata();
369        }
370    
371        /**
372         *  This is just a simple helper method which will first check the context
373         *  if there is already an override in place, and if there is not,
374         *  it will then check the given properties.
375         *
376         *  @param context WikiContext to check first
377         *  @param props   Properties to check next
378         *  @param key     What key are we searching for?
379         *  @param defValue Default value for the boolean
380         *  @return True or false
381         */
382        private static boolean getLocalBooleanProperty( WikiContext context,
383                                                        Properties  props,
384                                                        String      key,
385                                                        boolean     defValue )
386        {
387            Object bool = context.getVariable(key);
388    
389            if( bool != null )
390            {
391                return TextUtil.isPositive( (String) bool );
392            }
393    
394            return TextUtil.getBooleanProperty( props, key, defValue );
395        }
396    
397        /**
398         *  Returns link name, if it exists; otherwise it returns null.
399         */
400        private String linkExists( String page )
401        {
402            try
403            {
404                if( page == null || page.length() == 0 ) return null;
405    
406                return m_engine.getFinalPageName( page );
407            }
408            catch( ProviderException e )
409            {
410                log.warn("TranslatorReader got a faulty page name!",e);
411    
412                return page;  // FIXME: What would be the correct way to go back?
413            }
414        }
415    
416        /**
417         *  Calls a transmutator chain.
418         *
419         *  @param list Chain to call
420         *  @param text Text that should be passed to the mutate() method
421         *              of each of the mutators in the chain.
422         *  @return The result of the mutation.
423         */
424    
425        protected String callMutatorChain( Collection list, String text )
426        {
427            if( list == null || list.size() == 0 )
428            {
429                return text;
430            }
431    
432            for( Iterator i = list.iterator(); i.hasNext(); )
433            {
434                StringTransmutator m = (StringTransmutator) i.next();
435    
436                text = m.mutate( m_context, text );
437            }
438    
439            return text;
440        }
441    
442        /**
443         * Calls the heading listeners.
444         *
445         * @param param A Heading object.
446         */
447        protected void callHeadingListenerChain( Heading param )
448        {
449            List list = m_headingListenerChain;
450    
451            for( Iterator i = list.iterator(); i.hasNext(); )
452            {
453                HeadingListener h = (HeadingListener) i.next();
454    
455                h.headingAdded( m_context, param );
456            }
457        }
458    
459        /**
460         *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
461         *  if you really know what you are doing.
462         *
463         *  @param type One of the types above
464         *  @param link URL to which to link to
465         *  @param text Link text
466         *  @param section If a particular section identifier is required.
467         *  @return An A element.
468         *  @since 2.4.78
469         */
470        protected Element createAnchor(int type, String link, String text, String section)
471        {
472            text = escapeHTMLEntities( text );
473            section = escapeHTMLEntities( section );
474            Element el = new Element("a");
475            el.setAttribute("class",CLASS_TYPES[type]);
476            el.setAttribute("href",link+section);
477            el.addContent(text);
478            return el;
479        }
480    
481        private Element makeLink( int type, String link, String text, String section, Iterator attributes )
482        {
483            Element el = null;
484    
485            if( text == null ) text = link;
486    
487            text = callMutatorChain( m_linkMutators, text );
488    
489            section = (section != null) ? ("#"+section) : "";
490    
491            // Make sure we make a link name that can be accepted
492            // as a valid URL.
493    
494            if( link.length() == 0 )
495            {
496                type = EMPTY;
497            }
498            ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
499    
500            switch(type)
501            {
502                case READ:
503                    el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section );
504                    break;
505    
506                case EDIT:
507                    el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" );
508                    el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
509                    break;
510    
511                case EMPTY:
512                    el = new Element("u").addContent(text);
513                    break;
514    
515                    //
516                    //  These two are for local references - footnotes and
517                    //  references to footnotes.
518                    //  We embed the page name (or whatever WikiContext gives us)
519                    //  to make sure the links are unique across Wiki.
520                    //
521                case LOCALREF:
522                    el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
523                    break;
524    
525                case LOCAL:
526                    el = new Element("a").setAttribute("class","footnote");
527                    el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1));
528                    el.addContent("["+text+"]");
529                    break;
530    
531                    //
532                    //  With the image, external and interwiki types we need to
533                    //  make sure nobody can put in Javascript or something else
534                    //  annoying into the links themselves.  We do this by preventing
535                    //  a haxor from stopping the link name short with quotes in
536                    //  fillBuffer().
537                    //
538                case IMAGE:
539                    el = new Element("img").setAttribute("class","inline");
540                    el.setAttribute("src",link);
541                    el.setAttribute("alt",text);
542                    break;
543    
544                case IMAGELINK:
545                    el = new Element("img").setAttribute("class","inline");
546                    el.setAttribute("src",link);
547                    el.setAttribute("alt",text);
548                    el = createAnchor(IMAGELINK,text,"","").addContent(el);
549                    break;
550    
551                case IMAGEWIKILINK:
552                    String pagelink = m_context.getURL(WikiContext.VIEW,text);
553                    el = new Element("img").setAttribute("class","inline");
554                    el.setAttribute("src",link);
555                    el.setAttribute("alt",text);
556                    el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el);
557                    break;
558    
559                case EXTERNAL:
560                    el = createAnchor( EXTERNAL, link, text, section );
561                    if( m_useRelNofollow ) el.setAttribute("rel","nofollow");
562                    break;
563    
564                case INTERWIKI:
565                    el = createAnchor( INTERWIKI, link, text, section );
566                    break;
567    
568                case ATTACHMENT:
569                    String attlink = m_context.getURL( WikiContext.ATTACH,
570                                                       link );
571    
572                    String infolink = m_context.getURL( WikiContext.INFO,
573                                                        link );
574    
575                    String imglink = m_context.getURL( WikiContext.NONE,
576                                                       "images/attachment_small.png" );
577    
578                    el = createAnchor( ATTACHMENT, attlink, text, "" );
579    
580                    pushElement(el);
581                    popElement(el.getName());
582    
583                    if( m_useAttachmentImage )
584                    {
585                        el = new Element("img").setAttribute("src",imglink);
586                        el.setAttribute("border","0");
587                        el.setAttribute("alt","(info)");
588    
589                        el = new Element("a").setAttribute("href",infolink).addContent(el);
590                        el.setAttribute("class","infolink");
591                    }
592                    else
593                    {
594                        el = null;
595                    }
596                    break;
597    
598                default:
599                    break;
600            }
601    
602            if( el != null && attributes != null )
603            {
604                while( attributes.hasNext() )
605                {
606                    Attribute attr = (Attribute)attributes.next();
607                    if( attr != null )
608                    {
609                        el.setAttribute(attr);
610                    }
611                }
612            }
613    
614            if( el != null )
615            {
616                flushPlainText();
617                m_currentElement.addContent( el );
618            }
619            return el;
620        }
621    
622    
623        /**
624         *  Figures out if a link is an off-site link.  This recognizes
625         *  the most common protocols by checking how it starts.
626         *
627         *  @param link The link to check.
628         *  @return true, if this is a link outside of this wiki.
629         *  @since 2.4
630         */
631    
632        public static boolean isExternalLink( String link )
633        {
634            int idx = Arrays.binarySearch( EXTERNAL_LINKS, link,
635                                           c_startingComparator );
636    
637            //
638            //  We need to check here once again; otherwise we might
639            //  get a match for something like "h".
640            //
641            if( idx >= 0 && link.startsWith(EXTERNAL_LINKS[idx]) ) return true;
642    
643            return false;
644        }
645    
646        /**
647         *  Returns true, if the link in question is an access
648         *  rule.
649         */
650        private static boolean isAccessRule( String link )
651        {
652            return link.startsWith("{ALLOW") || link.startsWith("{DENY");
653        }
654    
655        /**
656         *  Returns true if the link is really command to insert
657         *  a plugin.
658         *  <P>
659         *  Currently we just check if the link starts with "{INSERT",
660         *  or just plain "{" but not "{$".
661         *
662         *  @param link Link text, i.e. the contents of text between [].
663         *  @return True, if this link seems to be a command to insert a plugin here.
664         */
665        public static boolean isPluginLink( String link )
666        {
667            return link.startsWith( "{INSERT" ) ||
668                   ( link.startsWith( "{" ) && !link.startsWith( "{$" ) );
669        }
670    
671        /**
672         *  Matches the given link to the list of image name patterns
673         *  to determine whether it should be treated as an inline image
674         *  or not.
675         */
676        private boolean isImageLink( String link )
677        {
678            if( m_inlineImages )
679            {
680                link = link.toLowerCase();
681    
682                for( Iterator i = m_inlineImagePatterns.iterator(); i.hasNext(); )
683                {
684                    if( m_inlineMatcher.matches( link, (Pattern) i.next() ) )
685                        return true;
686                }
687            }
688    
689            return false;
690        }
691    
692        private static boolean isMetadata( String link )
693        {
694            return link.startsWith("{SET");
695        }
696    
697        /**
698         *  These are all of the HTML 4.01 block-level elements.
699         */
700        private static final String[] BLOCK_ELEMENTS = {
701            "address", "blockquote", "div", "dl", "fieldset", "form",
702            "h1", "h2", "h3", "h4", "h5", "h6",
703            "hr", "noscript", "ol", "p", "pre", "table", "ul"
704        };
705    
706        private static boolean isBlockLevel( String name )
707        {
708            return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
709        }
710    
711        /**
712         *  This method peeks ahead in the stream until EOL and returns the result.
713         *  It will keep the buffers untouched.
714         *
715         *  @return The string from the current position to the end of line.
716         */
717    
718        // FIXME: Always returns an empty line, even if the stream is full.
719        private String peekAheadLine()
720            throws IOException
721        {
722            String s = readUntilEOL().toString();
723    
724            if( s.length() > PUSHBACK_BUFFER_SIZE )
725            {
726                log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters.  Attempting to recover...");
727                pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) );
728            }
729            else
730            {
731                try
732                {
733                    pushBack( s );
734                }
735                catch( IOException e )
736                {
737                    log.warn("Pushback failed: the line is probably too long.  Attempting to recover.");
738                }
739            }
740            return s;
741        }
742    
743    
744        /**
745         *  Writes HTML for error message.  Does not add it to the document, you
746         *  have to do it yourself.
747         *  
748         *  @param error The error string.
749         *  @return An Element containing the error.
750         */
751    
752        public static Element makeError( String error )
753        {
754            return new Element("span").setAttribute("class","error").addContent(error);
755        }
756    
757        private int flushPlainText()
758        {
759            int numChars = m_plainTextBuf.length();
760    
761            if( numChars > 0 )
762            {
763                String buf;
764    
765                if( !m_allowHTML )
766                {
767                    buf = escapeHTMLEntities(m_plainTextBuf.toString());
768                }
769                else
770                {
771                    buf = m_plainTextBuf.toString();
772                }
773                //
774                //  We must first empty the buffer because the side effect of
775                //  calling makeCamelCaseLink() is to call this routine.
776                //
777    
778                m_plainTextBuf = new StringBuilder(20);
779    
780                try
781                {
782                    //
783                    //  This is the heaviest part of parsing, and therefore we can
784                    //  do some optimization here.
785                    //
786                    //  1) Only when the length of the buffer is big enough, we try to do the match
787                    //
788    
789                    if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 )
790                    {
791                        // System.out.println("Buffer="+buf);
792    
793                        while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) )
794                        {
795                            MatchResult result = m_camelCaseMatcher.getMatch();
796    
797                            String firstPart = buf.substring(0,result.beginOffset(0));
798                            String prefix = result.group(1);
799    
800                            if( prefix == null ) prefix = "";
801    
802                            String camelCase = result.group(2);
803                            String protocol  = result.group(3);
804                            String uri       = protocol+result.group(4);
805                            buf              = buf.substring(result.endOffset(0));
806    
807                            m_currentElement.addContent( firstPart );
808    
809                            //
810                            //  Check if the user does not wish to do URL or WikiWord expansion
811                            //
812                            if( prefix.endsWith("~") || prefix.indexOf('[') != -1 )
813                            {
814                                if( prefix.endsWith("~") )
815                                {
816                                    if( m_wysiwygEditorMode )
817                                    {
818                                        m_currentElement.addContent( "~" );
819                                    }
820                                    prefix = prefix.substring(0,prefix.length()-1);
821                                }
822                                if( camelCase != null )
823                                {
824                                    m_currentElement.addContent( prefix+camelCase );
825                                }
826                                else if( protocol != null )
827                                {
828                                    m_currentElement.addContent( prefix+uri );
829                                }
830                                continue;
831                            }
832    
833                            //
834                            //  Fine, then let's check what kind of a link this was
835                            //  and emit the proper elements
836                            //
837                            if( protocol != null )
838                            {
839                                char c = uri.charAt(uri.length()-1);
840                                if( c == '.' || c == ',' )
841                                {
842                                    uri = uri.substring(0,uri.length()-1);
843                                    buf = c + buf;
844                                }
845                                // System.out.println("URI match "+uri);
846                                m_currentElement.addContent( prefix );
847                                makeDirectURILink( uri );
848                            }
849                            else
850                            {
851                                // System.out.println("Matched: '"+camelCase+"'");
852                                // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
853                                // System.out.println("prefix="+prefix);
854                                m_currentElement.addContent( prefix );
855    
856                                makeCamelCaseLink( camelCase );
857                            }
858                        }
859    
860                        m_currentElement.addContent( buf );
861                    }
862                    else
863                    {
864                        //
865                        //  No camelcase asked for, just add the elements
866                        //
867                        m_currentElement.addContent( buf );
868                    }
869                }
870                catch( IllegalDataException e )
871                {
872                    //
873                    // Sometimes it's possible that illegal XML chars is added to the data.
874                    // Here we make sure it does not stop parsing.
875                    //
876                    m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
877                }
878            }
879    
880            return numChars;
881        }
882    
883        /**
884         *  Escapes XML entities in a HTML-compatible way (i.e. does not escape
885         *  entities that are already escaped).
886         *
887         *  @param buf
888         *  @return An escaped string.
889         */
890        private String escapeHTMLEntities(String buf)
891        {
892            StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
893    
894            for( int i = 0; i < buf.length(); i++ )
895            {
896                char ch = buf.charAt(i);
897    
898                if( ch == '<' )
899                {
900                    tmpBuf.append("&lt;");
901                }
902                else if( ch == '>' )
903                {
904                    tmpBuf.append("&gt;");
905                }
906                else if( ch == '\"' )
907                {
908                    tmpBuf.append("&quot;");
909                }
910                else if( ch == '&' )
911                {
912                    //
913                    //  If the following is an XML entity reference (&#.*;) we'll
914                    //  leave it as it is; otherwise we'll replace it with an &amp;
915                    //
916                    
917                    boolean isEntity = false;
918                    StringBuilder entityBuf = new StringBuilder();
919                    
920                    if( i < buf.length() -1 )
921                    {
922                        for( int j = i; j < buf.length(); j++ )
923                        {
924                            char ch2 = buf.charAt(j);
925                            
926                            if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' )
927                            {
928                                entityBuf.append(ch2);
929                                
930                                if( ch2 == ';' )
931                                {
932                                    isEntity = true;
933                                    break;
934                                }
935                            }
936                            else
937                            {
938                                break;
939                            }
940                        }
941                    }
942                    
943                    if( isEntity ) 
944                    {
945                        tmpBuf.append( entityBuf );
946                        i = i + entityBuf.length() - 1;
947                    }
948                    else 
949                    {
950                        tmpBuf.append("&amp;");
951                    }
952                    
953                }
954                else
955                {
956                    tmpBuf.append( ch );
957                }
958            }
959    
960            return tmpBuf.toString();
961        }
962    
963        private Element pushElement( Element e )
964        {
965            flushPlainText();
966            m_currentElement.addContent( e );
967            m_currentElement = e;
968    
969            return e;
970        }
971    
972        private Element addElement( Content e )
973        {
974            if( e != null )
975            {
976                flushPlainText();
977                m_currentElement.addContent( e );
978            }
979            return m_currentElement;
980        }
981    
982        /**
983         *  All elements that can be empty by the HTML DTD.
984         */
985        //  Keep sorted.
986        private static final String[] EMPTY_ELEMENTS = {
987            "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
988        };
989    
990        /**
991         *  Goes through the current element stack and pops all elements until this
992         *  element is found - this essentially "closes" and element.
993         *
994         *  @param s
995         *  @return The new current element, or null, if there was no such element in the entire stack.
996         */
997        private Element popElement( String s )
998        {
999            int flushedBytes = flushPlainText();
1000    
1001            Element currEl = m_currentElement;
1002    
1003            while( currEl.getParentElement() != null )
1004            {
1005                if( currEl.getName().equals(s) && !currEl.isRootElement() )
1006                {
1007                    m_currentElement = currEl.getParentElement();
1008    
1009                    //
1010                    //  Check if it's okay for this element to be empty.  Then we will
1011                    //  trick the JDOM generator into not generating an empty element,
1012                    //  by putting an empty string between the tags.  Yes, it's a kludge
1013                    //  but what'cha gonna do about it. :-)
1014                    //
1015    
1016                    if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 )
1017                    {
1018                        currEl.addContent("");
1019                    }
1020    
1021                    return m_currentElement;
1022                }
1023    
1024                currEl = currEl.getParentElement();
1025            }
1026    
1027            return null;
1028        }
1029    
1030    
1031        /**
1032         *  Reads the stream until it meets one of the specified
1033         *  ending characters, or stream end.  The ending character will be left
1034         *  in the stream.
1035         */
1036        private String readUntil( String endChars )
1037            throws IOException
1038        {
1039            StringBuilder sb = new StringBuilder( 80 );
1040            int ch = nextToken();
1041    
1042            while( ch != -1 )
1043            {
1044                if( ch == '\\' )
1045                {
1046                    ch = nextToken();
1047                    if( ch == -1 )
1048                    {
1049                        break;
1050                    }
1051                }
1052                else
1053                {
1054                    if( endChars.indexOf((char)ch) != -1 )
1055                    {
1056                        pushBack( ch );
1057                        break;
1058                    }
1059                }
1060                sb.append( (char) ch );
1061                ch = nextToken();
1062            }
1063    
1064            return sb.toString();
1065        }
1066    
1067        /**
1068         *  Reads the stream while the characters that have been specified are
1069         *  in the stream, returning then the result as a String.
1070         */
1071        private String readWhile( String endChars )
1072            throws IOException
1073        {
1074            StringBuilder sb = new StringBuilder( 80 );
1075            int ch = nextToken();
1076    
1077            while( ch != -1 )
1078            {
1079                if( endChars.indexOf((char)ch) == -1 )
1080                {
1081                    pushBack( ch );
1082                    break;
1083                }
1084    
1085                sb.append( (char) ch );
1086                ch = nextToken();
1087            }
1088    
1089            return sb.toString();
1090        }
1091    
1092        private JSPWikiMarkupParser m_cleanTranslator;
1093    
1094        /**
1095         *  Does a lazy init.  Otherwise, we would get into a situation
1096         *  where HTMLRenderer would try and boot a TranslatorReader before
1097         *  the TranslatorReader it is contained by is up.
1098         */
1099        private JSPWikiMarkupParser getCleanTranslator()
1100        {
1101            if( m_cleanTranslator == null )
1102            {
1103                WikiContext dummyContext = new WikiContext( m_engine,
1104                                                            m_context.getHttpRequest(),
1105                                                            m_context.getPage() );
1106                m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
1107    
1108                m_cleanTranslator.m_allowHTML = true;
1109            }
1110    
1111            return m_cleanTranslator;
1112        }
1113        /**
1114         *  Modifies the "hd" parameter to contain proper values.  Because
1115         *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
1116         *  % after url encoding with '_'.
1117         *  <p>
1118         *  Counts also duplicate headings (= headings with similar name), and
1119         *  attaches a counter.
1120         */
1121        private String makeHeadingAnchor( String baseName, String title, Heading hd )
1122        {
1123            hd.m_titleText = title;
1124            title = MarkupParser.wikifyLink( title );
1125    
1126            hd.m_titleSection = m_engine.encodeName(title);
1127            
1128            if( m_titleSectionCounter.containsKey( hd.m_titleSection ) )
1129            {
1130                Integer count = m_titleSectionCounter.get( hd.m_titleSection );
1131                count = count + 1;
1132                m_titleSectionCounter.put( hd.m_titleSection, count );
1133                hd.m_titleSection += "-" + count;
1134            }
1135            else
1136            {
1137                m_titleSectionCounter.put( hd.m_titleSection, 1 );
1138            }
1139    
1140            hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+
1141                               "-"+hd.m_titleSection;
1142            hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
1143            hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
1144            
1145            return hd.m_titleAnchor;
1146        }
1147    
1148        private String makeSectionTitle( String title )
1149        {
1150            title = title.trim();
1151            String outTitle;
1152    
1153            try
1154            {
1155                JSPWikiMarkupParser dtr = getCleanTranslator();
1156                dtr.setInputReader( new StringReader(title) );
1157    
1158                CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse());
1159    
1160                outTitle = ctt.getString();
1161            }
1162            catch( IOException e )
1163            {
1164                log.fatal("CleanTranslator not working", e);
1165                throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() );
1166            }
1167    
1168            return outTitle;
1169        }
1170    
1171        /**
1172         *  Returns XHTML for the heading. 
1173         *  
1174         *  @param level The level of the heading.  @see Heading
1175         *  @param title the title for the heading
1176         *  @param hd a List to which heading should be added
1177         *  @return An Element containing the heading
1178         */
1179        public Element makeHeading( int level, String title, Heading hd )
1180        {
1181            Element el = null;
1182    
1183            String pageName = m_context.getPage().getName();
1184    
1185            String outTitle = makeSectionTitle( title );
1186    
1187            hd.m_level = level;
1188    
1189            switch( level )
1190            {
1191              case Heading.HEADING_SMALL:
1192                el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
1193                break;
1194    
1195              case Heading.HEADING_MEDIUM:
1196                el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
1197                break;
1198    
1199              case Heading.HEADING_LARGE:
1200                el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
1201                break;
1202    
1203              default:
1204                throw new InternalWikiException("Illegal heading type "+level);
1205            }
1206    
1207            
1208            return el;
1209        }
1210    
1211        /**
1212         *  When given a link to a WikiName, we just return
1213         *  a proper HTML link for it.  The local link mutator
1214         *  chain is also called.
1215         */
1216        private Element makeCamelCaseLink( String wikiname )
1217        {
1218            String matchedLink;
1219    
1220            callMutatorChain( m_localLinkMutatorChain, wikiname );
1221    
1222            if( (matchedLink = linkExists( wikiname )) != null )
1223            {
1224                makeLink( READ, matchedLink, wikiname, null, null );
1225            }
1226            else
1227            {
1228                makeLink( EDIT, wikiname, wikiname, null, null );
1229            }
1230    
1231            return m_currentElement;
1232        }
1233    
1234        /** Holds the image URL for the duration of this parser */
1235        private String m_outlinkImageURL = null;
1236    
1237        /**
1238         *  Returns an element for the external link image (out.png).  However,
1239         *  this method caches the URL for the lifetime of this MarkupParser,
1240         *  because it's commonly used, and we'll end up with possibly hundreds
1241         *  our thousands of references to it...  It's a lot faster, too.
1242         *
1243         *  @return  An element containing the HTML for the outlink image.
1244         */
1245        private Element outlinkImage()
1246        {
1247            Element el = null;
1248    
1249            if( m_useOutlinkImage )
1250            {
1251                if( m_outlinkImageURL == null )
1252                {
1253                    m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE );
1254                }
1255    
1256                el = new Element("img").setAttribute("class", "outlink");
1257                el.setAttribute( "src", m_outlinkImageURL );
1258                el.setAttribute("alt","");
1259            }
1260    
1261            return el;
1262        }
1263    
1264        /**
1265         *  Takes an URL and turns it into a regular wiki link.  Unfortunately,
1266         *  because of the way that flushPlainText() works, it already encodes
1267         *  all of the XML entities.  But so does WikiContext.getURL(), so we
1268         *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
1269         *  <p>
1270         *  What a crappy problem.
1271         *
1272         * @param url
1273         * @return An anchor Element containing the link.
1274         */
1275        private Element makeDirectURILink( String url )
1276        {
1277            Element result;
1278            String last = null;
1279    
1280            if( url.endsWith(",") || url.endsWith(".") )
1281            {
1282                last = url.substring( url.length()-1 );
1283                url  = url.substring( 0, url.length()-1 );
1284            }
1285    
1286            callMutatorChain( m_externalLinkMutatorChain, url );
1287    
1288            if( isImageLink( url ) )
1289            {
1290                result = handleImageLink( StringUtils.replace(url,"&amp;","&"), url, false );
1291            }
1292            else
1293            {
1294                result = makeLink( EXTERNAL, StringUtils.replace(url,"&amp;","&"), url, null, null );
1295                addElement( outlinkImage() );
1296            }
1297    
1298            if( last != null )
1299            {
1300                m_plainTextBuf.append(last);
1301            }
1302    
1303            return result;
1304        }
1305    
1306        /**
1307         *  Image links are handled differently:
1308         *  1. If the text is a WikiName of an existing page,
1309         *     it gets linked.
1310         *  2. If the text is an external link, then it is inlined.
1311         *  3. Otherwise it becomes an ALT text.
1312         *
1313         *  @param reallink The link to the image.
1314         *  @param link     Link text portion, may be a link to somewhere else.
1315         *  @param hasLinkText If true, then the defined link had a link text available.
1316         *                  This means that the link text may be a link to a wiki page,
1317         *                  or an external resource.
1318         */
1319    
1320        // FIXME: isExternalLink() is called twice.
1321        private Element handleImageLink( String reallink, String link, boolean hasLinkText )
1322        {
1323            String possiblePage = MarkupParser.cleanLink( link );
1324    
1325            if( isExternalLink( link ) && hasLinkText )
1326            {
1327                return makeLink( IMAGELINK, reallink, link, null, null );
1328            }
1329            else if( ( linkExists( possiblePage ) ) != null &&
1330                     hasLinkText )
1331            {
1332                // System.out.println("Orig="+link+", Matched: "+matchedLink);
1333                callMutatorChain( m_localLinkMutatorChain, possiblePage );
1334    
1335                return makeLink( IMAGEWIKILINK, reallink, link, null, null );
1336            }
1337            else
1338            {
1339                return makeLink( IMAGE, reallink, link, null, null );
1340            }
1341        }
1342    
1343        private Element handleAccessRule( String ruleLine )
1344        {
1345            if( m_wysiwygEditorMode )
1346            {
1347                m_currentElement.addContent( "[" + ruleLine + "]" );
1348            }
1349    
1350            if( !m_parseAccessRules ) return m_currentElement;
1351            Acl acl;
1352            WikiPage          page = m_context.getRealPage();
1353            // UserDatabase      db = m_context.getEngine().getUserDatabase();
1354    
1355            if( ruleLine.startsWith( "{" ) )
1356                ruleLine = ruleLine.substring( 1 );
1357            if( ruleLine.endsWith( "}" ) )
1358                ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1359    
1360            if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine);
1361    
1362            try
1363            {
1364                acl = m_engine.getAclManager().parseAcl( page, ruleLine );
1365    
1366                page.setAcl( acl );
1367    
1368                if( log.isDebugEnabled() ) log.debug( acl.toString() );
1369            }
1370            catch( WikiSecurityException wse )
1371            {
1372                return makeError( wse.getMessage() );
1373            }
1374    
1375            return m_currentElement;
1376        }
1377    
1378        /**
1379         *  Handles metadata setting [{SET foo=bar}]
1380         */
1381        private Element handleMetadata( String link )
1382        {
1383            if( m_wysiwygEditorMode )
1384            {
1385                m_currentElement.addContent( "[" + link + "]" );
1386            }
1387    
1388            try
1389            {
1390                String args = link.substring( link.indexOf(' '), link.length()-1 );
1391    
1392                String name = args.substring( 0, args.indexOf('=') );
1393                String val  = args.substring( args.indexOf('=')+1, args.length() );
1394    
1395                name = name.trim();
1396                val  = val.trim();
1397    
1398                if( val.startsWith("'") ) val = val.substring( 1 );
1399                if( val.endsWith("'") )   val = val.substring( 0, val.length()-1 );
1400    
1401                // log.debug("SET name='"+name+"', value='"+val+"'.");
1402    
1403                if( name.length() > 0 && val.length() > 0 )
1404                {
1405                    val = m_engine.getVariableManager().expandVariables( m_context,
1406                                                                         val );
1407    
1408                    m_context.getPage().setAttribute( name, val );
1409                }
1410            }
1411            catch( Exception e )
1412            {
1413                ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1414                return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
1415            }
1416    
1417            return m_currentElement;
1418        }
1419    
1420        /**
1421         *  Emits a processing instruction that will disable markup escaping. This is
1422         *  very useful if you want to emit HTML directly into the stream.
1423         *
1424         */
1425        private void disableOutputEscaping()
1426        {
1427            addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") );
1428        }
1429        
1430        /**
1431         *  Gobbles up all hyperlinks that are encased in square brackets.
1432         */
1433        private Element handleHyperlinks( String linktext, int pos )
1434        {
1435            ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1436            
1437            StringBuilder sb = new StringBuilder(linktext.length()+80);
1438    
1439            if( isAccessRule( linktext ) )
1440            {
1441                return handleAccessRule( linktext );
1442            }
1443    
1444            if( isMetadata( linktext ) )
1445            {
1446                return handleMetadata( linktext );
1447            }
1448    
1449            if( isPluginLink( linktext ) )
1450            {
1451                try
1452                {
1453                    PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
1454                    //
1455                    //  This might sometimes fail, especially if there is something which looks
1456                    //  like a plugin invocation but is really not.
1457                    //
1458                    if( pluginContent != null )
1459                    {
1460                        addElement( pluginContent );
1461    
1462                        pluginContent.executeParse( m_context );
1463                    }
1464                }
1465                catch( PluginException e )
1466                {
1467                    log.info( "Failed to insert plugin: "+e.getMessage() );
1468                    //log.info( "Root cause:",e.getRootThrowable() );
1469                    if( !m_wysiwygEditorMode )
1470                    {
1471                        ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE );
1472                        return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), e.getMessage() ) ) );
1473                    }
1474                }
1475    
1476                return m_currentElement;
1477            }
1478    
1479            try
1480            {
1481                LinkParser.Link link = m_linkParser.parse(linktext);
1482                linktext       = link.getText();
1483                String linkref = link.getReference();
1484    
1485                //
1486                //  Yes, we now have the components separated.
1487                //  linktext = the text the link should have
1488                //  linkref  = the url or page name.
1489                //
1490                //  In many cases these are the same.  [linktext|linkref].
1491                //
1492                if( VariableManager.isVariableLink( linktext ) )
1493                {
1494                    Content el = new VariableContent(linktext);
1495    
1496                    addElement( el );
1497                }
1498                else if( isExternalLink( linkref ) )
1499                {
1500                    // It's an external link, out of this Wiki
1501    
1502                    callMutatorChain( m_externalLinkMutatorChain, linkref );
1503    
1504                    if( isImageLink( linkref ) )
1505                    {
1506                        handleImageLink( linkref, linktext, link.hasReference() );
1507                    }
1508                    else
1509                    {
1510                        makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
1511                        addElement( outlinkImage() );
1512                    }
1513                }
1514                else if( link.isInterwikiLink() )
1515                {
1516                    // It's an interwiki link
1517                    // InterWiki links also get added to external link chain
1518                    // after the links have been resolved.
1519    
1520                    // FIXME: There is an interesting issue here:  We probably should
1521                    //        URLEncode the wikiPage, but we can't since some of the
1522                    //        Wikis use slashes (/), which won't survive URLEncoding.
1523                    //        Besides, we don't know which character set the other Wiki
1524                    //        is using, so you'll have to write the entire name as it appears
1525                    //        in the URL.  Bugger.
1526    
1527                    String extWiki  = link.getExternalWiki();
1528                    String wikiPage = link.getExternalWikiPage();
1529    
1530                    if( m_wysiwygEditorMode )
1531                    {
1532                        makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
1533                    }
1534                    else
1535                    {
1536                        String urlReference = m_engine.getInterWikiURL( extWiki );
1537    
1538                        if( urlReference != null )
1539                        {
1540                            urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1541                            urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
1542    
1543                            if( isImageLink(urlReference) )
1544                            {
1545                                handleImageLink( urlReference, linktext, link.hasReference() );
1546                            }
1547                            else
1548                            {
1549                                makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
1550                            }
1551    
1552                            if( isExternalLink(urlReference) )
1553                            {
1554                                addElement( outlinkImage() );
1555                            }
1556                        }
1557                        else
1558                        {
1559                            Object[] args = { extWiki };
1560                            addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
1561                        }
1562                    }
1563                }
1564                else if( linkref.startsWith("#") )
1565                {
1566                    // It defines a local footnote
1567                    makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1568                }
1569                else if( TextUtil.isNumber( linkref ) )
1570                {
1571                    // It defines a reference to a local footnote
1572                    makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1573                }
1574                else
1575                {
1576                    int hashMark = -1;
1577    
1578                    //
1579                    //  Internal wiki link, but is it an attachment link?
1580                    //
1581                    String attachment = findAttachment( linkref );
1582                    if( attachment != null )
1583                    {
1584                        callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1585    
1586                        if( isImageLink( linkref ) )
1587                        {
1588                            attachment = m_context.getURL( WikiContext.ATTACH, attachment );
1589                            sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1590                        }
1591                        else
1592                        {
1593                            makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1594                        }
1595                    }
1596                    else if( (hashMark = linkref.indexOf('#')) != -1 )
1597                    {
1598                        // It's an internal Wiki link, but to a named section
1599    
1600                        String namedSection = linkref.substring( hashMark+1 );
1601                        linkref = linkref.substring( 0, hashMark );
1602    
1603                        linkref = MarkupParser.cleanLink( linkref );
1604    
1605                        callMutatorChain( m_localLinkMutatorChain, linkref );
1606    
1607                        String matchedLink;
1608                        if( (matchedLink = linkExists( linkref )) != null )
1609                        {
1610                            String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection));
1611                            sectref = sectref.replace('%', '_');
1612                            makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1613                        }
1614                        else
1615                        {
1616                            makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1617                        }
1618                    }
1619                    else
1620                    {
1621                        // It's an internal Wiki link
1622                        linkref = MarkupParser.cleanLink( linkref );
1623    
1624                        callMutatorChain( m_localLinkMutatorChain, linkref );
1625    
1626                        String matchedLink = linkExists( linkref );
1627    
1628                        if( matchedLink != null )
1629                        {
1630                            makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1631                        }
1632                        else
1633                        {
1634                            makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1635                        }
1636                    }
1637                }
1638            }
1639            catch( ParseException e )
1640            {
1641                log.info("Parser failure: ",e);
1642                Object[] args = { e.getMessage() };
1643                addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1644            }
1645    
1646            return m_currentElement;
1647        }
1648    
1649        private String findAttachment( String linktext )
1650        {
1651            AttachmentManager mgr = m_engine.getAttachmentManager();
1652            Attachment att = null;
1653    
1654            try
1655            {
1656                att = mgr.getAttachmentInfo( m_context, linktext );
1657            }
1658            catch( ProviderException e )
1659            {
1660                log.warn("Finding attachments failed: ",e);
1661                return null;
1662            }
1663    
1664            if( att != null )
1665            {
1666                return att.getName();
1667            }
1668            else if( linktext.indexOf('/') != -1 )
1669            {
1670                return linktext;
1671            }
1672    
1673            return null;
1674        }
1675    
1676        /**
1677         *  Pushes back any string that has been read.  It will obviously
1678         *  be pushed back in a reverse order.
1679         *
1680         *  @since 2.1.77
1681         */
1682        private void pushBack( String s )
1683            throws IOException
1684        {
1685            for( int i = s.length()-1; i >= 0; i-- )
1686            {
1687                pushBack( s.charAt(i) );
1688            }
1689        }
1690    
1691        private Element handleBackslash()
1692            throws IOException
1693        {
1694            int ch = nextToken();
1695    
1696            if( ch == '\\' )
1697            {
1698                int ch2 = nextToken();
1699    
1700                if( ch2 == '\\' )
1701                {
1702                    pushElement( new Element("br").setAttribute("clear","all"));
1703                    return popElement("br");
1704                }
1705    
1706                pushBack( ch2 );
1707    
1708                pushElement( new Element("br") );
1709                return popElement("br");
1710            }
1711    
1712            pushBack( ch );
1713    
1714            return null;
1715        }
1716    
1717        private Element handleUnderscore()
1718            throws IOException
1719        {
1720            int ch = nextToken();
1721            Element el = null;
1722    
1723            if( ch == '_' )
1724            {
1725                if( m_isbold )
1726                {
1727                    el = popElement("b");
1728                }
1729                else
1730                {
1731                    el = pushElement( new Element("b") );
1732                }
1733                m_isbold = !m_isbold;
1734            }
1735            else
1736            {
1737                pushBack( ch );
1738            }
1739    
1740            return el;
1741        }
1742    
1743    
1744        /**
1745         *  For example: italics.
1746         */
1747        private Element handleApostrophe()
1748            throws IOException
1749        {
1750            int ch = nextToken();
1751            Element el = null;
1752    
1753            if( ch == '\'' )
1754            {
1755                if( m_isitalic )
1756                {
1757                    el = popElement("i");
1758                }
1759                else
1760                {
1761                    el = pushElement( new Element("i") );
1762                }
1763                m_isitalic = !m_isitalic;
1764            }
1765            else
1766            {
1767                pushBack( ch );
1768            }
1769    
1770            return el;
1771        }
1772    
1773        private Element handleOpenbrace( boolean isBlock )
1774            throws IOException
1775        {
1776            int ch = nextToken();
1777    
1778            if( ch == '{' )
1779            {
1780                int ch2 = nextToken();
1781    
1782                if( ch2 == '{' )
1783                {
1784                    m_isPre = true;
1785                    m_isEscaping = true;
1786                    m_isPreBlock = isBlock;
1787    
1788                    if( isBlock )
1789                    {
1790                        startBlockLevel();
1791                        return pushElement( new Element("pre") );
1792                    }
1793    
1794                    return pushElement( new Element("span").setAttribute("style","font-family:monospace; white-space:pre;") );
1795                }
1796    
1797                pushBack( ch2 );
1798    
1799                return pushElement( new Element("tt") );
1800            }
1801    
1802            pushBack( ch );
1803    
1804            return null;
1805        }
1806    
1807        /**
1808         *  Handles both }} and }}}
1809         */
1810        private Element handleClosebrace()
1811            throws IOException
1812        {
1813            int ch2 = nextToken();
1814    
1815            if( ch2 == '}' )
1816            {
1817                int ch3 = nextToken();
1818    
1819                if( ch3 == '}' )
1820                {
1821                    if( m_isPre )
1822                    {
1823                        if( m_isPreBlock )
1824                        {
1825                            popElement( "pre" );
1826                        }
1827                        else
1828                        {
1829                            popElement( "span" );
1830                        }
1831    
1832                        m_isPre = false;
1833                        m_isEscaping = false;
1834                        return m_currentElement;
1835                    }
1836    
1837                    m_plainTextBuf.append("}}}");
1838                    return m_currentElement;
1839                }
1840    
1841                pushBack( ch3 );
1842    
1843                if( !m_isEscaping )
1844                {
1845                    return popElement("tt");
1846                }
1847            }
1848    
1849            pushBack( ch2 );
1850    
1851            return null;
1852        }
1853    
1854        private Element handleDash()
1855            throws IOException
1856        {
1857            int ch = nextToken();
1858    
1859            if( ch == '-' )
1860            {
1861                int ch2 = nextToken();
1862    
1863                if( ch2 == '-' )
1864                {
1865                    int ch3 = nextToken();
1866    
1867                    if( ch3 == '-' )
1868                    {
1869                        // Empty away all the rest of the dashes.
1870                        // Do not forget to return the first non-match back.
1871                        do
1872                        {
1873                            ch = nextToken();
1874                        }
1875                        while ( ch == '-' );
1876    
1877                        pushBack(ch);
1878                        startBlockLevel();
1879                        pushElement( new Element("hr") );
1880                        return popElement( "hr" );
1881                    }
1882    
1883                    pushBack( ch3 );
1884                }
1885                pushBack( ch2 );
1886            }
1887    
1888            pushBack( ch );
1889    
1890            return null;
1891        }
1892    
1893        private Element handleHeading()
1894            throws IOException
1895        {
1896            Element el = null;
1897    
1898            int ch  = nextToken();
1899    
1900            Heading hd = new Heading();
1901    
1902            if( ch == '!' )
1903            {
1904                int ch2 = nextToken();
1905    
1906                if( ch2 == '!' )
1907                {
1908                    String title = peekAheadLine();
1909    
1910                    el = makeHeading( Heading.HEADING_LARGE, title, hd);
1911                }
1912                else
1913                {
1914                    pushBack( ch2 );
1915                    String title = peekAheadLine();
1916                    el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1917                }
1918            }
1919            else
1920            {
1921                pushBack( ch );
1922                String title = peekAheadLine();
1923                el = makeHeading( Heading.HEADING_SMALL, title, hd );
1924            }
1925    
1926            callHeadingListenerChain( hd );
1927    
1928            m_lastHeading = hd;
1929            
1930            if( el != null ) pushElement(el);
1931    
1932            return el;
1933        }
1934    
1935        /**
1936         *  Reads the stream until the next EOL or EOF.  Note that it will also read the
1937         *  EOL from the stream.
1938         */
1939        private StringBuilder readUntilEOL()
1940            throws IOException
1941        {
1942            int ch;
1943            StringBuilder buf = new StringBuilder( 256 );
1944    
1945            while( true )
1946            {
1947                ch = nextToken();
1948    
1949                if( ch == -1 )
1950                    break;
1951    
1952                buf.append( (char) ch );
1953    
1954                if( ch == '\n' )
1955                    break;
1956            }
1957            return buf;
1958        }
1959    
1960        /** Controls whether italic is restarted after a paragraph shift */
1961    
1962        private boolean m_restartitalic = false;
1963        private boolean m_restartbold   = false;
1964    
1965        private boolean m_newLine;
1966    
1967        /**
1968         *  Starts a block level element, therefore closing
1969         *  a potential open paragraph tag.
1970         */
1971        private void startBlockLevel()
1972        {
1973            // These may not continue over block level limits in XHTML
1974    
1975            popElement("i");
1976            popElement("b");
1977            popElement("tt");
1978    
1979            if( m_isOpenParagraph )
1980            {
1981                m_isOpenParagraph = false;
1982                popElement("p");
1983                m_plainTextBuf.append("\n"); // Just small beautification
1984            }
1985    
1986            m_restartitalic = m_isitalic;
1987            m_restartbold   = m_isbold;
1988    
1989            m_isitalic = false;
1990            m_isbold   = false;
1991        }
1992    
1993        private static String getListType( char c )
1994        {
1995            if( c == '*' )
1996            {
1997                return "ul";
1998            }
1999            else if( c == '#' )
2000            {
2001                return "ol";
2002            }
2003            throw new InternalWikiException("Parser got faulty list type: "+c);
2004        }
2005        /**
2006         *  Like original handleOrderedList() and handleUnorderedList()
2007         *  however handles both ordered ('#') and unordered ('*') mixed together.
2008         */
2009    
2010        // FIXME: Refactor this; it's a bit messy.
2011    
2012        private Element handleGeneralList()
2013            throws IOException
2014        {
2015             startBlockLevel();
2016    
2017             String strBullets = readWhile( "*#" );
2018             // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
2019             int numBullets = strBullets.length();
2020    
2021             // override the beginning portion of bullet pattern to be like the previous
2022             // to simulate PHPWiki style lists
2023    
2024             if(m_allowPHPWikiStyleLists)
2025             {
2026                 // only substitute if different
2027                 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
2028                       (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) )
2029                 {
2030                     if(numBullets <= m_genlistlevel)
2031                     {
2032                         // Substitute all but the last character (keep the expressed bullet preference)
2033                         strBullets  = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "")
2034                                       + strBullets.substring(numBullets-1, numBullets);
2035                     }
2036                     else
2037                     {
2038                         strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets);
2039                     }
2040                 }
2041             }
2042    
2043             //
2044             //  Check if this is still of the same type
2045             //
2046             if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
2047                (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) )
2048             {
2049                 if( numBullets > m_genlistlevel )
2050                 {
2051                     pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) );
2052    
2053                     for( ; m_genlistlevel < numBullets; m_genlistlevel++ )
2054                     {
2055                         // bullets are growing, get from new bullet list
2056                         pushElement( new Element("li") );
2057                         pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) ));
2058                     }
2059                 }
2060                 else if( numBullets < m_genlistlevel )
2061                 {
2062                     //  Close the previous list item.
2063                     // buf.append( m_renderer.closeListItem() );
2064                     popElement( "li" );
2065    
2066                     for( ; m_genlistlevel > numBullets; m_genlistlevel-- )
2067                     {
2068                         // bullets are shrinking, get from old bullet list
2069    
2070                         popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
2071                         if( m_genlistlevel > 0 )
2072                         {
2073                             popElement( "li" );
2074                         }
2075    
2076                     }
2077                 }
2078                 else
2079                 {
2080                     if( m_genlistlevel > 0 )
2081                     {
2082                         popElement( "li" );
2083                     }
2084                 }
2085             }
2086             else
2087             {
2088                 //
2089                 //  The pattern has changed, unwind and restart
2090                 //
2091                 int  numEqualBullets;
2092                 int  numCheckBullets;
2093    
2094                 // find out how much is the same
2095                 numEqualBullets = 0;
2096                 numCheckBullets = Math.min(numBullets,m_genlistlevel);
2097    
2098                 while( numEqualBullets < numCheckBullets )
2099                 {
2100                     // if the bullets are equal so far, keep going
2101                     if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets))
2102                         numEqualBullets++;
2103                     // otherwise giveup, we have found how many are equal
2104                     else
2105                         break;
2106                 }
2107    
2108                 //unwind
2109                 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- )
2110                 {
2111                     popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) );
2112                     if( m_genlistlevel > numBullets )
2113                     {
2114                         popElement("li");
2115                     }
2116                 }
2117    
2118                 //rewind
2119    
2120                 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) );
2121                 for(int i = numEqualBullets; i < numBullets; i++)
2122                 {
2123                     pushElement( new Element("li") );
2124                     pushElement( new Element( getListType( strBullets.charAt(i) ) ) );
2125                 }
2126                 m_genlistlevel = numBullets;
2127             }
2128    
2129             //
2130             //  Push a new list item, and eat away any extra whitespace
2131             //
2132             pushElement( new Element("li") );
2133             readWhile(" ");
2134    
2135             // work done, remember the new bullet list (in place of old one)
2136             m_genlistBulletBuffer.setLength(0);
2137             m_genlistBulletBuffer.append(strBullets);
2138    
2139             return m_currentElement;
2140        }
2141    
2142        private Element unwindGeneralList()
2143        {
2144            //unwind
2145            for( ; m_genlistlevel > 0; m_genlistlevel-- )
2146            {
2147                popElement( "li" );
2148                popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
2149            }
2150    
2151            m_genlistBulletBuffer.setLength(0);
2152    
2153            return null;
2154        }
2155    
2156    
2157        private Element handleDefinitionList()
2158            throws IOException
2159        {
2160            if( !m_isdefinition )
2161            {
2162                m_isdefinition = true;
2163    
2164                startBlockLevel();
2165    
2166                pushElement( new Element("dl") );
2167                return pushElement( new Element("dt") );
2168            }
2169    
2170            return null;
2171        }
2172    
2173        private Element handleOpenbracket()
2174            throws IOException
2175        {
2176            StringBuilder sb = new StringBuilder(40);
2177            int pos = getPosition();
2178            int ch = nextToken();
2179            boolean isPlugin = false;
2180    
2181            if( ch == '[' )
2182            {
2183                if( m_wysiwygEditorMode )
2184                {
2185                    sb.append( '[' );
2186                }
2187    
2188                sb.append( (char)ch );
2189    
2190                while( (ch = nextToken()) == '[' )
2191                {
2192                    sb.append( (char)ch );
2193                }
2194            }
2195    
2196    
2197            if( ch == '{' )
2198            {
2199                isPlugin = true;
2200            }
2201    
2202            pushBack( ch );
2203    
2204            if( sb.length() > 0 )
2205            {
2206                m_plainTextBuf.append( sb );
2207                return m_currentElement;
2208            }
2209    
2210            //
2211            //  Find end of hyperlink
2212            //
2213    
2214            ch = nextToken();
2215            int nesting = 1;    // Check for nested plugins
2216    
2217            while( ch != -1 )
2218            {
2219                int ch2 = nextToken(); pushBack(ch2);
2220    
2221                if( isPlugin )
2222                {
2223                    if( ch == '[' && ch2 == '{' )
2224                    {
2225                        nesting++;
2226                    }
2227                    else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' )
2228                    {
2229                        break;
2230                    }
2231                    else if( ch == '}' && ch2 == ']' )
2232                    {
2233                        // NB: This will be decremented once at the end
2234                        nesting--;
2235                    }
2236                }
2237                else
2238                {
2239                    if( ch == ']' )
2240                    {
2241                        break;
2242                    }
2243                }
2244    
2245                sb.append( (char) ch );
2246    
2247                ch = nextToken();
2248            }
2249    
2250            //
2251            //  If the link is never finished, do some tricks to display the rest of the line
2252            //  unchanged.
2253            //
2254            if( ch == -1 )
2255            {
2256                log.debug("Warning: unterminated link detected!");
2257                m_isEscaping = true;
2258                m_plainTextBuf.append( sb );
2259                flushPlainText();
2260                m_isEscaping = false;
2261                return m_currentElement;
2262            }
2263    
2264            return handleHyperlinks( sb.toString(), pos );
2265        }
2266    
2267        /**
2268         *  Reads the stream until the current brace is closed or stream end.
2269         */
2270        private String readBraceContent( char opening, char closing )
2271            throws IOException
2272        {
2273            StringBuilder sb = new StringBuilder(40);
2274            int braceLevel = 1;
2275            int ch;
2276            while(( ch = nextToken() ) != -1 )
2277            {
2278                if( ch == '\\' )
2279                {
2280                    continue;
2281                }
2282                else if ( ch == opening )
2283                {
2284                    braceLevel++;
2285                }
2286                else if ( ch == closing )
2287                {
2288                    braceLevel--;
2289                    if (braceLevel==0)
2290                    {
2291                      break;
2292                    }
2293                }
2294                sb.append( (char)ch );
2295            }
2296            return sb.toString();
2297        }
2298    
2299    
2300        /**
2301         *  Handles constructs of type %%(style) and %%class
2302         * @param newLine
2303         * @return An Element containing the div or span, depending on the situation.
2304         * @throws IOException
2305         */
2306        private Element handleDiv( boolean newLine )
2307            throws IOException
2308        {
2309            int ch = nextToken();
2310            Element el = null;
2311    
2312            if( ch == '%' )
2313            {
2314                String style = null;
2315                String clazz = null;
2316    
2317                ch = nextToken();
2318    
2319                //
2320                //  Style or class?
2321                //
2322                if( ch == '(' )
2323                {
2324                    style = readBraceContent('(',')');
2325                }
2326                else if( Character.isLetter( (char) ch ) )
2327                {
2328                    pushBack( ch );
2329                    clazz = readUntil( " \t\n\r" );
2330                    ch = nextToken();
2331    
2332                    //
2333                    //  Pop out only spaces, so that the upcoming EOL check does not check the
2334                    //  next line.
2335                    //
2336                    if( ch == '\n' || ch == '\r' )
2337                    {
2338                        pushBack(ch);
2339                    }
2340                }
2341                else
2342                {
2343                    //
2344                    // Anything else stops.
2345                    //
2346    
2347                    pushBack(ch);
2348    
2349                    try
2350                    {
2351                        Boolean isSpan = m_styleStack.pop();
2352    
2353                        if( isSpan == null )
2354                        {
2355                            // Fail quietly
2356                        }
2357                        else if( isSpan.booleanValue() )
2358                        {
2359                            el = popElement( "span" );
2360                        }
2361                        else
2362                        {
2363                            el = popElement( "div" );
2364                        }
2365                    }
2366                    catch( EmptyStackException e )
2367                    {
2368                        log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened.");
2369                        return m_currentElement;
2370                    }
2371    
2372                    return el;
2373                }
2374    
2375                //
2376                //  Check if there is an attempt to do something nasty
2377                //
2378                
2379                try
2380                {
2381                    style = StringEscapeUtils.unescapeHtml(style);
2382                    if( style != null && style.indexOf("javascript:") != -1 )
2383                    {
2384                        log.debug("Attempt to output javascript within CSS:"+style);
2385                        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2386                        return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
2387                    }
2388                }
2389                catch( NumberFormatException e )
2390                {
2391                    //
2392                    //  If there are unknown entities, we don't want the parser to stop.
2393                    //
2394                    ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2395                    String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
2396                    return addElement( makeError( msg ) );
2397                }
2398    
2399                //
2400                //  Decide if we should open a div or a span?
2401                //
2402                String eol = peekAheadLine();
2403    
2404                if( eol.trim().length() > 0 )
2405                {
2406                    // There is stuff after the class
2407    
2408                    el = new Element("span");
2409    
2410                    m_styleStack.push( Boolean.TRUE );
2411                }
2412                else
2413                {
2414                    startBlockLevel();
2415                    el = new Element("div");
2416                    m_styleStack.push( Boolean.FALSE );
2417                }
2418    
2419                if( style != null ) el.setAttribute("style", style);
2420                if( clazz != null ) el.setAttribute("class", clazz );
2421                el = pushElement( el );
2422    
2423                return el;
2424            }
2425    
2426            pushBack(ch);
2427    
2428            return el;
2429        }
2430    
2431        private Element handleSlash( boolean newLine )
2432            throws IOException
2433        {
2434            int ch = nextToken();
2435    
2436            pushBack(ch);
2437            if( ch == '%' && !m_styleStack.isEmpty() )
2438            {
2439                return handleDiv( newLine );
2440            }
2441    
2442            return null;
2443        }
2444    
2445        private Element handleBar( boolean newLine )
2446            throws IOException
2447        {
2448            Element el = null;
2449    
2450            if( !m_istable && !newLine )
2451            {
2452                return null;
2453            }
2454    
2455            //
2456            //  If the bar is in the first column, we will either start
2457            //  a new table or continue the old one.
2458            //
2459    
2460            if( newLine )
2461            {
2462                if( !m_istable )
2463                {
2464                    startBlockLevel();
2465                    el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
2466                    m_istable = true;
2467                    m_rowNum = 0;
2468                }
2469    
2470                m_rowNum++;
2471                Element tr = ( m_rowNum % 2 != 0 )
2472                           ? new Element("tr").setAttribute("class", "odd")
2473                           : new Element("tr");
2474                el = pushElement( tr );
2475            }
2476    
2477            //
2478            //  Check out which table cell element to start;
2479            //  a header element (th) or a regular element (td).
2480            //
2481            int ch = nextToken();
2482    
2483            if( ch == '|' )
2484            {
2485                if( !newLine )
2486                {
2487                    el = popElement("th");
2488                    if( el == null ) popElement("td");
2489                }
2490                el = pushElement( new Element("th") );
2491            }
2492            else
2493            {
2494                if( !newLine )
2495                {
2496                    el = popElement("td");
2497                    if( el == null ) popElement("th");
2498                }
2499    
2500                el = pushElement( new Element("td") );
2501    
2502                pushBack( ch );
2503            }
2504    
2505            return el;
2506        }
2507    
2508        /**
2509         *  Generic escape of next character or entity.
2510         */
2511        private Element handleTilde()
2512            throws IOException
2513        {
2514            int ch = nextToken();
2515    
2516            if( ch == ' ' )
2517            {
2518                if( m_wysiwygEditorMode )
2519                {
2520                    m_plainTextBuf.append( "~ " );
2521                }
2522                return m_currentElement;
2523            }
2524    
2525            if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
2526                ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
2527                ch == '{' || ch == ']' || ch == '}' || ch == '%' )
2528            {
2529                if( m_wysiwygEditorMode )
2530                {
2531                    m_plainTextBuf.append( '~' );
2532                }
2533    
2534                m_plainTextBuf.append( (char)ch );
2535                m_plainTextBuf.append(readWhile( ""+(char)ch ));
2536                return m_currentElement;
2537            }
2538    
2539            // No escape.
2540            pushBack( ch );
2541    
2542            return null;
2543        }
2544    
2545        private void fillBuffer( Element startElement )
2546            throws IOException
2547        {
2548            m_currentElement = startElement;
2549    
2550            boolean quitReading = false;
2551            m_newLine = true;
2552            disableOutputEscaping();
2553    
2554            while(!quitReading)
2555            {
2556                int ch = nextToken();
2557    
2558                if( ch == -1 ) break;
2559    
2560                //
2561                //  Check if we're actually ending the preformatted mode.
2562                //  We still must do an entity transformation here.
2563                //
2564                if( m_isEscaping )
2565                {
2566                    if( ch == '}' )
2567                    {
2568                        if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
2569                    }
2570                    else if( ch == -1 )
2571                    {
2572                        quitReading = true;
2573                    }
2574                    else if( ch == '\r' )
2575                    {
2576                        // DOS line feeds we ignore.
2577                    }
2578                    else if( ch == '<' )
2579                    {
2580                        m_plainTextBuf.append( "&lt;" );
2581                    }
2582                    else if( ch == '>' )
2583                    {
2584                        m_plainTextBuf.append( "&gt;" );
2585                    }
2586                    else if( ch == '&' )
2587                    {
2588                        m_plainTextBuf.append( "&amp;" );
2589                    }
2590                    else if( ch == '~' )
2591                    {
2592                        String braces = readWhile("}");
2593                        if( braces.length() >= 3 )
2594                        {
2595                            m_plainTextBuf.append("}}}");
2596    
2597                            braces = braces.substring(3);
2598                        }
2599                        else
2600                        {
2601                            m_plainTextBuf.append( (char) ch );
2602                        }
2603    
2604                        for( int i = braces.length()-1; i >= 0; i-- )
2605                        {
2606                            pushBack(braces.charAt(i));
2607                        }
2608                    }
2609                    else
2610                    {
2611                        m_plainTextBuf.append( (char) ch );
2612                    }
2613    
2614                    continue;
2615                }
2616    
2617                //
2618                //  An empty line stops a list
2619                //
2620                if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 )
2621                {
2622                    m_plainTextBuf.append(unwindGeneralList());
2623                }
2624    
2625                if( m_newLine && ch != '|' && m_istable )
2626                {
2627                    popElement("table");
2628                    m_istable = false;
2629                }
2630    
2631                int skip = IGNORE;
2632                
2633                //
2634                //  Do the actual parsing and catch any errors.
2635                //
2636                try
2637                {
2638                    skip = parseToken( ch );
2639                }
2640                catch( IllegalDataException e )
2641                {
2642                    log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage());
2643                    
2644                    makeError("Error: "+cleanupSuspectData(e.getMessage()) );
2645                }
2646                
2647                //
2648                //   The idea is as follows:  If the handler method returns
2649                //   an element (el != null), it is assumed that it has been
2650                //   added in the stack.  Otherwise the character is added
2651                //   as is to the plaintext buffer.
2652                //
2653                //   For the transition phase, if s != null, it also gets
2654                //   added in the plaintext buffer.
2655                //
2656    
2657                switch( skip )
2658                {
2659                    case ELEMENT:
2660                        m_newLine = false;
2661                        break;
2662    
2663                    case CHARACTER:
2664                        m_plainTextBuf.append( (char) ch );
2665                        m_newLine = false;
2666                        break;
2667    
2668                    case IGNORE:
2669                    default:
2670                        break;
2671                }
2672            }
2673    
2674            closeHeadings();
2675            popElement("domroot");
2676        }
2677    
2678        private String cleanupSuspectData( String s )
2679        {
2680            StringBuilder sb = new StringBuilder( s.length() );
2681            
2682            for( int i = 0; i < s.length(); i++ )
2683            {
2684                char c = s.charAt(i);
2685                
2686                if( Verifier.isXMLCharacter( c ) ) sb.append( c );
2687                else sb.append( "0x"+Integer.toString(c,16).toUpperCase() );
2688            }
2689            
2690            return sb.toString();
2691        }
2692        
2693        /** The token is a plain character. */
2694        protected static final int CHARACTER = 0;
2695        
2696        /** The token is a wikimarkup element. */
2697        protected static final int ELEMENT   = 1;
2698        
2699        /** The token is to be ignored. */
2700        protected static final int IGNORE    = 2;
2701    
2702        /**
2703         *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
2704         *  you think this was a wiki markup element, and IGNORE, if you think
2705         *  we should ignore this altogether.
2706         *  <p>
2707         *  To add your own MarkupParser, you can override this method, but it
2708         *  is recommended that you call super.parseToken() as well to gain advantage
2709         *  of JSPWiki's own markup.  You can call it at the start of your own
2710         *  parseToken() or end - it does not matter.
2711         *
2712         * @param ch The character under investigation
2713         * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2714         * @throws IOException If parsing fails.
2715         */
2716        protected int parseToken( int ch )
2717            throws IOException
2718        {
2719            Element el = null;
2720    
2721            //
2722            //  Now, check the incoming token.
2723            //
2724            switch( ch )
2725            {
2726              case '\r':
2727                // DOS linefeeds we forget
2728                return IGNORE;
2729    
2730              case '\n':
2731                //
2732                //  Close things like headings, etc.
2733                //
2734    
2735                // FIXME: This is not really very fast
2736                
2737                closeHeadings();
2738                  
2739                popElement("dl"); // Close definition lists.
2740                if( m_istable )
2741                {
2742                    popElement("tr");
2743                }
2744    
2745                m_isdefinition = false;
2746    
2747                if( m_newLine )
2748                {
2749                    // Paragraph change.
2750                    startBlockLevel();
2751    
2752                    //
2753                    //  Figure out which elements cannot be enclosed inside
2754                    //  a <p></p> pair according to XHTML rules.
2755                    //
2756                    String nextLine = peekAheadLine();
2757                    if( nextLine.length() == 0 ||
2758                        (nextLine.length() > 0 &&
2759                         !nextLine.startsWith("{{{") &&
2760                         !nextLine.startsWith("----") &&
2761                         !nextLine.startsWith("%%") &&
2762                         "*#!;".indexOf( nextLine.charAt(0) ) == -1) )
2763                    {
2764                        pushElement( new Element("p") );
2765                        m_isOpenParagraph = true;
2766    
2767                        if( m_restartitalic )
2768                        {
2769                            pushElement( new Element("i") );
2770                            m_isitalic = true;
2771                            m_restartitalic = false;
2772                        }
2773                        if( m_restartbold )
2774                        {
2775                            pushElement( new Element("b") );
2776                            m_isbold = true;
2777                            m_restartbold = false;
2778                        }
2779                    }
2780                }
2781                else
2782                {
2783                    m_plainTextBuf.append("\n");
2784                    m_newLine = true;
2785                }
2786                return IGNORE;
2787    
2788    
2789              case '\\':
2790                el = handleBackslash();
2791                break;
2792    
2793              case '_':
2794                el = handleUnderscore();
2795                break;
2796    
2797              case '\'':
2798                el = handleApostrophe();
2799                break;
2800    
2801              case '{':
2802                el = handleOpenbrace( m_newLine );
2803                break;
2804    
2805              case '}':
2806                el = handleClosebrace();
2807                break;
2808    
2809              case '-':
2810                if( m_newLine )
2811                    el = handleDash();
2812    
2813                break;
2814    
2815              case '!':
2816                if( m_newLine )
2817                {
2818                    el = handleHeading();
2819                }
2820                break;
2821    
2822              case ';':
2823                if( m_newLine )
2824                {
2825                    el = handleDefinitionList();
2826                }
2827                break;
2828    
2829              case ':':
2830                if( m_isdefinition )
2831                {
2832                    popElement("dt");
2833                    el = pushElement( new Element("dd") );
2834                    m_isdefinition = false;
2835                }
2836                break;
2837    
2838              case '[':
2839                el = handleOpenbracket();
2840                break;
2841    
2842              case '*':
2843                if( m_newLine )
2844                {
2845                    pushBack('*');
2846                    el = handleGeneralList();
2847                }
2848                break;
2849    
2850              case '#':
2851                if( m_newLine )
2852                {
2853                    pushBack('#');
2854                    el = handleGeneralList();
2855                }
2856                break;
2857    
2858              case '|':
2859                el = handleBar( m_newLine );
2860                break;
2861    
2862              case '~':
2863                el = handleTilde();
2864                break;
2865    
2866              case '%':
2867                el = handleDiv( m_newLine );
2868                break;
2869    
2870              case '/':
2871                el = handleSlash( m_newLine );
2872                break;
2873    
2874              default:
2875                break;
2876            }
2877    
2878            return el != null ? ELEMENT : CHARACTER;
2879        }
2880    
2881        private void closeHeadings()
2882        {
2883            if( m_lastHeading != null && !m_wysiwygEditorMode )
2884            {
2885                // Add the hash anchor element at the end of the heading
2886                addElement( new Element("a").setAttribute( "class","hashlink" ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) );
2887                m_lastHeading = null;
2888            }
2889            popElement("h2");
2890            popElement("h3");
2891            popElement("h4");
2892        }
2893    
2894        /**
2895         *  Parses the entire document from the Reader given in the constructor or
2896         *  set by {@link #setInputReader(Reader)}.
2897         *  
2898         *  @return A WikiDocument, ready to be passed to the renderer.
2899         *  @throws IOException If parsing cannot be accomplished.
2900         */
2901        public WikiDocument parse()
2902            throws IOException
2903        {
2904            WikiDocument d = new WikiDocument( m_context.getPage() );
2905            d.setContext( m_context );
2906    
2907            Element rootElement = new Element("domroot");
2908    
2909            d.setRootElement( rootElement );
2910    
2911            fillBuffer( rootElement );
2912    
2913            paragraphify(rootElement);
2914    
2915            return d;
2916        }
2917    
2918        /**
2919         *  Checks out that the first paragraph is correctly installed.
2920         *
2921         *  @param rootElement
2922         */
2923        private void paragraphify(Element rootElement)
2924        {
2925            //
2926            //  Add the paragraph tag to the first paragraph
2927            //
2928            List kids = rootElement.getContent();
2929    
2930            if( rootElement.getChild("p") != null )
2931            {
2932                ArrayList<Content> ls = new ArrayList<Content>();
2933                int idxOfFirstContent = 0;
2934                int count = 0;
2935    
2936                for( Iterator i = kids.iterator(); i.hasNext(); count++ )
2937                {
2938                    Content c = (Content) i.next();
2939                    if( c instanceof Element )
2940                    {
2941                        String name = ((Element)c).getName();
2942                        if( isBlockLevel(name) ) break;
2943                    }
2944    
2945                    if( !(c instanceof ProcessingInstruction) )
2946                    {
2947                        ls.add( c );
2948                        if( idxOfFirstContent == 0 ) idxOfFirstContent = count;
2949                    }
2950                }
2951    
2952                //
2953                //  If there were any elements, then add a new <p> (unless it would
2954                //  be an empty one)
2955                //
2956                if( ls.size() > 0 )
2957                {
2958                    Element newel = new Element("p");
2959    
2960                    for( Iterator i = ls.iterator(); i.hasNext(); )
2961                    {
2962                        Content c = (Content) i.next();
2963    
2964                        c.detach();
2965                        newel.addContent(c);
2966                    }
2967    
2968                    //
2969                    // Make sure there are no empty <p/> tags added.
2970                    //
2971                    if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() )
2972                        rootElement.addContent(idxOfFirstContent, newel);
2973                }
2974            }
2975        }
2976    
2977    
2978        /**
2979         *  Compares two Strings, and if one starts with the other, then
2980         *  returns null.  Otherwise just like the normal Comparator
2981         *  for strings.
2982         *
2983         *  @since
2984         */
2985        private static class StartingComparator implements Comparator<String>
2986        {
2987            public int compare( String s1, String s2 )
2988            {
2989                if( s1.length() > s2.length() )
2990                {
2991                    if( s1.startsWith(s2) && s2.length() > 1 ) return 0;
2992                }
2993                else
2994                {
2995                    if( s2.startsWith(s1) && s1.length() > 1 ) return 0;
2996                }
2997    
2998                return s1.compareTo( s2 );
2999            }
3000    
3001        }
3002    
3003    
3004    }
3005