001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import java.io.IOException;
022import java.io.Reader;
023import java.io.StringReader;
024import java.text.MessageFormat;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collection;
028import java.util.Collections;
029import java.util.Comparator;
030import java.util.EmptyStackException;
031import java.util.HashMap;
032import java.util.Iterator;
033import java.util.List;
034import java.util.Map;
035import java.util.Properties;
036import java.util.ResourceBundle;
037import java.util.Stack;
038
039import javax.xml.transform.Result;
040
041import org.apache.commons.lang.StringEscapeUtils;
042import org.apache.commons.lang.StringUtils;
043import org.apache.log4j.Logger;
044import org.apache.oro.text.GlobCompiler;
045import org.apache.oro.text.regex.MalformedPatternException;
046import org.apache.oro.text.regex.MatchResult;
047import org.apache.oro.text.regex.Pattern;
048import org.apache.oro.text.regex.PatternCompiler;
049import org.apache.oro.text.regex.PatternMatcher;
050import org.apache.oro.text.regex.Perl5Compiler;
051import org.apache.oro.text.regex.Perl5Matcher;
052import org.apache.wiki.InternalWikiException;
053import org.apache.wiki.StringTransmutator;
054import org.apache.wiki.VariableManager;
055import org.apache.wiki.WikiContext;
056import org.apache.wiki.WikiPage;
057import org.apache.wiki.api.exceptions.PluginException;
058import org.apache.wiki.api.exceptions.ProviderException;
059import org.apache.wiki.api.plugin.WikiPlugin;
060import org.apache.wiki.attachment.Attachment;
061import org.apache.wiki.attachment.AttachmentManager;
062import org.apache.wiki.auth.WikiSecurityException;
063import org.apache.wiki.auth.acl.Acl;
064import org.apache.wiki.i18n.InternationalizationManager;
065import org.apache.wiki.preferences.Preferences;
066import org.apache.wiki.render.CleanTextRenderer;
067import org.apache.wiki.render.RenderingManager;
068import org.apache.wiki.util.TextUtil;
069import org.jdom2.Attribute;
070import org.jdom2.Content;
071import org.jdom2.Element;
072import org.jdom2.IllegalDataException;
073import org.jdom2.ProcessingInstruction;
074import org.jdom2.Verifier;
075
076/**
077 *  Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the
078 *  heart and soul of JSPWiki : make sure you test properly anything that is added,
079 *  or else it breaks down horribly.
080 *
081 *  @since  2.4
082 */
083public class JSPWikiMarkupParser extends MarkupParser {
084
085    /** Name of the outlink image; relative path to the JSPWiki directory. */
086    private static final String OUTLINK_IMAGE = "images/out.png";
087
088    /** The value for anchor element <tt>class</tt> attributes when used
089      * for wiki page (normal) links. The value is "wikipage". */
090    public static final String CLASS_WIKIPAGE = "wikipage";
091
092    /** The value for anchor element <tt>class</tt> attributes when used
093      * for edit page links. The value is "createpage". */
094    public static final String CLASS_EDITPAGE = "createpage";
095
096    /** The value for anchor element <tt>class</tt> attributes when used
097      * for interwiki page links. The value is "interwiki". */
098    public static final String CLASS_INTERWIKI = "interwiki";
099
100    protected static final int              READ          = 0;
101    protected static final int              EDIT          = 1;
102    protected static final int              EMPTY         = 2;  // Empty message
103    protected static final int              LOCAL         = 3;
104    protected static final int              LOCALREF      = 4;
105    protected static final int              IMAGE         = 5;
106    protected static final int              EXTERNAL      = 6;
107    protected static final int              INTERWIKI     = 7;
108    protected static final int              IMAGELINK     = 8;
109    protected static final int              IMAGEWIKILINK = 9;
110    protected static final int              ATTACHMENT    = 10;
111
112    private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class );
113
114    private boolean        m_isbold       = false;
115    private boolean        m_isitalic     = false;
116    private boolean        m_istable      = false;
117    private boolean        m_isPre        = false;
118    private boolean        m_isEscaping   = false;
119    private boolean        m_isdefinition = false;
120    private boolean        m_isPreBlock   = false;
121
122    /** Contains style information, in multiple forms. */
123    private Stack<Boolean> m_styleStack   = new Stack<Boolean>();
124
125     // general list handling
126    private int            m_genlistlevel = 0;
127    private StringBuilder  m_genlistBulletBuffer = new StringBuilder(10);  // stores the # and * pattern
128    private boolean        m_allowPHPWikiStyleLists = true;
129
130
131    private boolean        m_isOpenParagraph = false;
132
133    /** Keeps image regexp Patterns */
134    private List<Pattern>  m_inlineImagePatterns;
135
136    /** Parser for extended link functionality. */
137    private LinkParser     m_linkParser = new LinkParser();
138
139    private PatternMatcher m_inlineMatcher = new Perl5Matcher();
140
141    /** Keeps track of any plain text that gets put in the Text nodes */
142    private StringBuilder  m_plainTextBuf = new StringBuilder(20);
143
144    private Element        m_currentElement;
145
146    /** Keep track of duplicate header names.  */
147    private Map<String, Integer>   m_titleSectionCounter = new HashMap<String, Integer>();
148
149    /** If true, consider CamelCase hyperlinks as well. */
150    public static final String     PROP_CAMELCASELINKS   = "jspwiki.translatorReader.camelCaseLinks";
151
152    /** If true, all hyperlinks are translated as well, regardless whether they
153        are surrounded by brackets. */
154    public static final String     PROP_PLAINURIS        = "jspwiki.translatorReader.plainUris";
155
156    /** If true, all outward links (external links) have a small link image appended. */
157    public static final String     PROP_USEOUTLINKIMAGE  = "jspwiki.translatorReader.useOutlinkImage";
158
159    /** If true, all outward attachment info links have a small link image appended. */
160    public static final String     PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
161
162    /** If set to "true", all external links are tagged with 'rel="nofollow"' */
163    public static final String     PROP_USERELNOFOLLOW   = "jspwiki.translatorReader.useRelNofollow";
164
165    /** If true, then considers CamelCase links as well. */
166    private boolean                m_camelCaseLinks      = false;
167
168    /** If true, then generate special output for wysiwyg editing in certain cases */
169    private boolean                m_wysiwygEditorMode     = false;
170
171    /** If true, consider URIs that have no brackets as well. */
172    // FIXME: Currently reserved, but not used.
173    private boolean                m_plainUris           = false;
174
175    /** If true, all outward links use a small link image. */
176    private boolean                m_useOutlinkImage     = true;
177
178    private boolean                m_useAttachmentImage  = true;
179
180    /** If true, allows raw HTML. */
181    private boolean                m_allowHTML           = false;
182
183    private boolean                m_useRelNofollow      = false;
184
185    private PatternCompiler        m_compiler = new Perl5Compiler();
186
187    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
188
189    private PatternMatcher         m_camelCaseMatcher = new Perl5Matcher();
190    private Pattern                m_camelCasePattern;
191
192    private int                    m_rowNum              = 1;
193
194    private Heading                m_lastHeading         = null;
195
196    /**
197     *  This list contains all IANA registered URI protocol
198     *  types as of September 2004 + a few well-known extra types.
199     *
200     *  JSPWiki recognises all of them as external links.
201     *
202     *  This array is sorted during class load, so you can just dump
203     *  here whatever you want in whatever order you want.
204     */
205    static final String[] EXTERNAL_LINKS = {
206        "http:", "ftp:", "https:", "mailto:",
207        "news:", "file:", "rtsp:", "mms:", "ldap:",
208        "gopher:", "nntp:", "telnet:", "wais:",
209        "prospero:", "z39.50s", "z39.50r", "vemmi:",
210        "imap:", "nfs:", "acap:", "tip:", "pop:",
211        "dav:", "opaquelocktoken:", "sip:", "sips:",
212        "tel:", "fax:", "modem:", "soap.beep:", "soap.beeps",
213        "xmlrpc.beep", "xmlrpc.beeps", "urn:", "go:",
214        "h323:", "ipp:", "tftp:", "mupdate:", "pres:",
215        "im:", "mtqp", "smb:" };
216
217    private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns";
218
219    private static final String CAMELCASE_PATTERN     = "JSPWikiMarkupParser.camelCasePattern";
220
221    private static final String[] CLASS_TYPES =
222    {
223       CLASS_WIKIPAGE,
224       CLASS_EDITPAGE,
225       "",
226       "footnote",
227       "footnoteref",
228       "",
229       "external",
230       CLASS_INTERWIKI,
231       "external",
232       CLASS_WIKIPAGE,
233       "attachment"
234    };
235
236
237    /**
238     *  This Comparator is used to find an external link from c_externalLinks.  It
239     *  checks if the link starts with the other arraythingie.
240     */
241    private static Comparator<String> c_startingComparator = new StartingComparator();
242
243    static
244    {
245        Arrays.sort( EXTERNAL_LINKS );
246    }
247
248    /**
249     *  Creates a markup parser.
250     *
251     *  @param context The WikiContext which controls the parsing
252     *  @param in Where the data is read from.
253     */
254    public JSPWikiMarkupParser( WikiContext context, Reader in )
255    {
256        super( context, in );
257        initialize();
258    }
259
260    // FIXME: parsers should be pooled for better performance.
261    @SuppressWarnings("unchecked")
262    private void initialize()
263    {
264        PatternCompiler compiler         = new GlobCompiler();
265        List<Pattern>   compiledpatterns;
266
267        //
268        //  We cache compiled patterns in the engine, since their creation is
269        //  really expensive
270        //
271        compiledpatterns = (List<Pattern>)m_engine.getAttribute( INLINE_IMAGE_PATTERNS );
272
273        if( compiledpatterns == null )
274        {
275            compiledpatterns = new ArrayList<Pattern>(20);
276            Collection< String > ptrns = m_engine.getAllInlinedImagePatterns();
277
278            //
279            //  Make them into Regexp Patterns.  Unknown patterns
280            //  are ignored.
281            //
282            for( Iterator< String > i = ptrns.iterator(); i.hasNext(); )
283            {
284                try
285                {
286                    compiledpatterns.add( compiler.compile( i.next(),
287                                                            GlobCompiler.DEFAULT_MASK|GlobCompiler.READ_ONLY_MASK ) );
288                }
289                catch( MalformedPatternException e )
290                {
291                    log.error("Malformed pattern in properties: ", e );
292                }
293            }
294
295            m_engine.setAttribute( INLINE_IMAGE_PATTERNS, compiledpatterns );
296        }
297
298        m_inlineImagePatterns = Collections.unmodifiableList(compiledpatterns);
299
300        m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN );
301        if( m_camelCasePattern == null )
302        {
303            try
304            {
305                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,
306                                                         Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
307            }
308            catch( MalformedPatternException e )
309            {
310                log.fatal("Internal error: Someone put in a faulty pattern.",e);
311                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader");
312            }
313            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
314        }
315        //
316        //  Set the properties.
317        //
318        Properties props      = m_engine.getWikiProperties();
319
320        String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
321
322        if( cclinks != null )
323        {
324            m_camelCaseLinks = TextUtil.isPositive( cclinks );
325        }
326        else
327        {
328            m_camelCaseLinks  = TextUtil.getBooleanProperty( props,
329                                                             PROP_CAMELCASELINKS,
330                                                             m_camelCaseLinks );
331        }
332
333
334
335        Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE );
336        if( wysiwygVariable != null )
337        {
338            m_wysiwygEditorMode = wysiwygVariable.booleanValue();
339        }
340
341        m_plainUris           = getLocalBooleanProperty( m_context,
342                                                         props,
343                                                         PROP_PLAINURIS,
344                                                         m_plainUris );
345        m_useOutlinkImage     = getLocalBooleanProperty( m_context,
346                                                         props,
347                                                         PROP_USEOUTLINKIMAGE,
348                                                         m_useOutlinkImage );
349        m_useAttachmentImage  = getLocalBooleanProperty( m_context,
350                                                         props,
351                                                         PROP_USEATTACHMENTIMAGE,
352                                                         m_useAttachmentImage );
353        m_allowHTML           = getLocalBooleanProperty( m_context,
354                                                         props,
355                                                         MarkupParser.PROP_ALLOWHTML,
356                                                         m_allowHTML );
357
358        m_useRelNofollow      = getLocalBooleanProperty( m_context,
359                                                         props,
360                                                         PROP_USERELNOFOLLOW,
361                                                         m_useRelNofollow );
362
363        if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null )
364        {
365            disableAccessRules();
366        }
367
368        m_context.getPage().setHasMetadata();
369    }
370
371    /**
372     *  This is just a simple helper method which will first check the context
373     *  if there is already an override in place, and if there is not,
374     *  it will then check the given properties.
375     *
376     *  @param context WikiContext to check first
377     *  @param props   Properties to check next
378     *  @param key     What key are we searching for?
379     *  @param defValue Default value for the boolean
380     *  @return True or false
381     */
382    private static boolean getLocalBooleanProperty( WikiContext context,
383                                                    Properties  props,
384                                                    String      key,
385                                                    boolean     defValue )
386    {
387        Object bool = context.getVariable(key);
388
389        if( bool != null )
390        {
391            return TextUtil.isPositive( (String) bool );
392        }
393
394        return TextUtil.getBooleanProperty( props, key, defValue );
395    }
396
397    /**
398     *  Returns link name, if it exists; otherwise it returns null.
399     */
400    private String linkExists( String page )
401    {
402        try
403        {
404            if( page == null || page.length() == 0 ) return null;
405
406            return m_engine.getFinalPageName( page );
407        }
408        catch( ProviderException e )
409        {
410            log.warn("TranslatorReader got a faulty page name!",e);
411
412            return page;  // FIXME: What would be the correct way to go back?
413        }
414    }
415
416    /**
417     *  Calls a transmutator chain.
418     *
419     *  @param list Chain to call
420     *  @param text Text that should be passed to the mutate() method
421     *              of each of the mutators in the chain.
422     *  @return The result of the mutation.
423     */
424
425    protected String callMutatorChain( Collection list, String text )
426    {
427        if( list == null || list.size() == 0 )
428        {
429            return text;
430        }
431
432        for( Iterator i = list.iterator(); i.hasNext(); )
433        {
434            StringTransmutator m = (StringTransmutator) i.next();
435
436            text = m.mutate( m_context, text );
437        }
438
439        return text;
440    }
441
442    /**
443     * Calls the heading listeners.
444     *
445     * @param param A Heading object.
446     */
447    protected void callHeadingListenerChain( Heading param )
448    {
449        List< HeadingListener > list = m_headingListenerChain;
450
451        for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); )
452        {
453            HeadingListener h = i.next();
454
455            h.headingAdded( m_context, param );
456        }
457    }
458
459    /**
460     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
461     *  if you really know what you are doing.
462     *
463     *  @param type One of the types above
464     *  @param link URL to which to link to
465     *  @param text Link text
466     *  @param section If a particular section identifier is required.
467     *  @return An A element.
468     *  @since 2.4.78
469     */
470    protected Element createAnchor(int type, String link, String text, String section)
471    {
472        text = escapeHTMLEntities( text );
473        section = escapeHTMLEntities( section );
474        Element el = new Element("a");
475        el.setAttribute("class",CLASS_TYPES[type]);
476        el.setAttribute("href",link+section);
477        el.addContent(text);
478        return el;
479    }
480
481    private Element makeLink( int type, String link, String text, String section, Iterator attributes )
482    {
483        Element el = null;
484
485        if( text == null ) text = link;
486
487        text = callMutatorChain( m_linkMutators, text );
488
489        section = (section != null) ? ("#"+section) : "";
490
491        // Make sure we make a link name that can be accepted
492        // as a valid URL.
493
494        if( link.length() == 0 )
495        {
496            type = EMPTY;
497        }
498        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
499
500        switch(type)
501        {
502            case READ:
503                el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section );
504                break;
505
506            case EDIT:
507                el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" );
508                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
509
510                break;
511
512            case EMPTY:
513                el = new Element("u").addContent(text);
514                break;
515
516                //
517                //  These two are for local references - footnotes and
518                //  references to footnotes.
519                //  We embed the page name (or whatever WikiContext gives us)
520                //  to make sure the links are unique across Wiki.
521                //
522            case LOCALREF:
523                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
524                break;
525
526            case LOCAL:
527                el = new Element("a").setAttribute("class","footnote");
528                el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1));
529                el.addContent("["+text+"]");
530                break;
531
532                //
533                //  With the image, external and interwiki types we need to
534                //  make sure nobody can put in Javascript or something else
535                //  annoying into the links themselves.  We do this by preventing
536                //  a haxor from stopping the link name short with quotes in
537                //  fillBuffer().
538                //
539            case IMAGE:
540                el = new Element("img").setAttribute("class","inline");
541                el.setAttribute("src",link);
542                el.setAttribute("alt",text);
543                break;
544
545            case IMAGELINK:
546                el = new Element("img").setAttribute("class","inline");
547                el.setAttribute("src",link);
548                el.setAttribute("alt",text);
549                el = createAnchor(IMAGELINK,text,"","").addContent(el);
550                break;
551
552            case IMAGEWIKILINK:
553                String pagelink = m_context.getURL(WikiContext.VIEW,text);
554                el = new Element("img").setAttribute("class","inline");
555                el.setAttribute("src",link);
556                el.setAttribute("alt",text);
557                el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el);
558                break;
559
560            case EXTERNAL:
561                el = createAnchor( EXTERNAL, link, text, section );
562                if( m_useRelNofollow ) el.setAttribute("rel","nofollow");
563                break;
564
565            case INTERWIKI:
566                el = createAnchor( INTERWIKI, link, text, section );
567                break;
568
569            case ATTACHMENT:
570                String attlink = m_context.getURL( WikiContext.ATTACH,
571                                                   link );
572
573                String infolink = m_context.getURL( WikiContext.INFO,
574                                                    link );
575
576                String imglink = m_context.getURL( WikiContext.NONE,
577                                                   "images/attachment_small.png" );
578
579                el = createAnchor( ATTACHMENT, attlink, text, "" );
580
581                pushElement(el);
582                popElement(el.getName());
583
584                if( m_useAttachmentImage )
585                {
586                    el = new Element("img").setAttribute("src",imglink);
587                    el.setAttribute("border","0");
588                    el.setAttribute("alt","(info)");
589
590                    el = new Element("a").setAttribute("href",infolink).addContent(el);
591                    el.setAttribute("class","infolink");
592                }
593                else
594                {
595                    el = null;
596                }
597                break;
598
599            default:
600                break;
601        }
602
603        if( el != null && attributes != null )
604        {
605            while( attributes.hasNext() )
606            {
607                Attribute attr = (Attribute)attributes.next();
608                if( attr != null )
609                {
610                    el.setAttribute(attr);
611                }
612            }
613        }
614
615        if( el != null )
616        {
617            flushPlainText();
618            m_currentElement.addContent( el );
619        }
620        return el;
621    }
622
623
624    /**
625     *  Figures out if a link is an off-site link.  This recognizes
626     *  the most common protocols by checking how it starts.
627     *
628     *  @param link The link to check.
629     *  @return true, if this is a link outside of this wiki.
630     *  @since 2.4
631     */
632
633    public static boolean isExternalLink( String link )
634    {
635        int idx = Arrays.binarySearch( EXTERNAL_LINKS, link,
636                                       c_startingComparator );
637
638        //
639        //  We need to check here once again; otherwise we might
640        //  get a match for something like "h".
641        //
642        if( idx >= 0 && link.startsWith(EXTERNAL_LINKS[idx]) ) return true;
643
644        return false;
645    }
646
647    /**
648     *  Returns true, if the link in question is an access
649     *  rule.
650     */
651    private static boolean isAccessRule( String link )
652    {
653        return link.startsWith("{ALLOW") || link.startsWith("{DENY");
654    }
655
656    /**
657     *  Returns true if the link is really command to insert
658     *  a plugin.
659     *  <P>
660     *  Currently we just check if the link starts with "{INSERT",
661     *  or just plain "{" but not "{$".
662     *
663     *  @param link Link text, i.e. the contents of text between [].
664     *  @return True, if this link seems to be a command to insert a plugin here.
665     */
666    public static boolean isPluginLink( String link )
667    {
668        return link.startsWith( "{INSERT" ) ||
669               ( link.startsWith( "{" ) && !link.startsWith( "{$" ) );
670    }
671
672    /**
673     *  Matches the given link to the list of image name patterns
674     *  to determine whether it should be treated as an inline image
675     *  or not.
676     */
677    private boolean isImageLink( String link )
678    {
679        if( m_inlineImages )
680        {
681            link = link.toLowerCase();
682
683            for( Iterator< Pattern >  i = m_inlineImagePatterns.iterator(); i.hasNext(); )
684            {
685                if( m_inlineMatcher.matches( link, i.next() ) )
686                    return true;
687            }
688        }
689
690        return false;
691    }
692
693    private static boolean isMetadata( String link )
694    {
695        return link.startsWith("{SET");
696    }
697
698    /**
699     *  These are all of the HTML 4.01 block-level elements.
700     */
701    private static final String[] BLOCK_ELEMENTS = {
702        "address", "blockquote", "div", "dl", "fieldset", "form",
703        "h1", "h2", "h3", "h4", "h5", "h6",
704        "hr", "noscript", "ol", "p", "pre", "table", "ul"
705    };
706
707    private static boolean isBlockLevel( String name )
708    {
709        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
710    }
711
712    /**
713     *  This method peeks ahead in the stream until EOL and returns the result.
714     *  It will keep the buffers untouched.
715     *
716     *  @return The string from the current position to the end of line.
717     */
718
719    // FIXME: Always returns an empty line, even if the stream is full.
720    private String peekAheadLine()
721        throws IOException
722    {
723        String s = readUntilEOL().toString();
724
725        if( s.length() > PUSHBACK_BUFFER_SIZE )
726        {
727            log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters.  Attempting to recover...");
728            pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) );
729        }
730        else
731        {
732            try
733            {
734                pushBack( s );
735            }
736            catch( IOException e )
737            {
738                log.warn("Pushback failed: the line is probably too long.  Attempting to recover.");
739            }
740        }
741        return s;
742    }
743
744    private int flushPlainText()
745    {
746        int numChars = m_plainTextBuf.length();
747
748        if( numChars > 0 )
749        {
750            String buf;
751
752            if( !m_allowHTML )
753            {
754                buf = escapeHTMLEntities(m_plainTextBuf.toString());
755            }
756            else
757            {
758                buf = m_plainTextBuf.toString();
759            }
760            //
761            //  We must first empty the buffer because the side effect of
762            //  calling makeCamelCaseLink() is to call this routine.
763            //
764
765            m_plainTextBuf = new StringBuilder(20);
766
767            try
768            {
769                //
770                //  This is the heaviest part of parsing, and therefore we can
771                //  do some optimization here.
772                //
773                //  1) Only when the length of the buffer is big enough, we try to do the match
774                //
775
776                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 )
777                {
778                    // System.out.println("Buffer="+buf);
779
780                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) )
781                    {
782                        MatchResult result = m_camelCaseMatcher.getMatch();
783
784                        String firstPart = buf.substring(0,result.beginOffset(0));
785                        String prefix = result.group(1);
786
787                        if( prefix == null ) prefix = "";
788
789                        String camelCase = result.group(2);
790                        String protocol  = result.group(3);
791                        String uri       = protocol+result.group(4);
792                        buf              = buf.substring(result.endOffset(0));
793
794                        m_currentElement.addContent( firstPart );
795
796                        //
797                        //  Check if the user does not wish to do URL or WikiWord expansion
798                        //
799                        if( prefix.endsWith("~") || prefix.indexOf('[') != -1 )
800                        {
801                            if( prefix.endsWith("~") )
802                            {
803                                if( m_wysiwygEditorMode )
804                                {
805                                    m_currentElement.addContent( "~" );
806                                }
807                                prefix = prefix.substring(0,prefix.length()-1);
808                            }
809                            if( camelCase != null )
810                            {
811                                m_currentElement.addContent( prefix+camelCase );
812                            }
813                            else if( protocol != null )
814                            {
815                                m_currentElement.addContent( prefix+uri );
816                            }
817                            continue;
818                        }
819
820                        //
821                        //  Fine, then let's check what kind of a link this was
822                        //  and emit the proper elements
823                        //
824                        if( protocol != null )
825                        {
826                            char c = uri.charAt(uri.length()-1);
827                            if( c == '.' || c == ',' )
828                            {
829                                uri = uri.substring(0,uri.length()-1);
830                                buf = c + buf;
831                            }
832                            // System.out.println("URI match "+uri);
833                            m_currentElement.addContent( prefix );
834                            makeDirectURILink( uri );
835                        }
836                        else
837                        {
838                            // System.out.println("Matched: '"+camelCase+"'");
839                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
840                            // System.out.println("prefix="+prefix);
841                            m_currentElement.addContent( prefix );
842
843                            makeCamelCaseLink( camelCase );
844                        }
845                    }
846
847                    m_currentElement.addContent( buf );
848                }
849                else
850                {
851                    //
852                    //  No camelcase asked for, just add the elements
853                    //
854                    m_currentElement.addContent( buf );
855                }
856            }
857            catch( IllegalDataException e )
858            {
859                //
860                // Sometimes it's possible that illegal XML chars is added to the data.
861                // Here we make sure it does not stop parsing.
862                //
863                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
864            }
865        }
866
867        return numChars;
868    }
869
870    /**
871     *  Escapes XML entities in a HTML-compatible way (i.e. does not escape
872     *  entities that are already escaped).
873     *
874     *  @param buf
875     *  @return An escaped string.
876     */
877    private String escapeHTMLEntities(String buf)
878    {
879        StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
880
881        for( int i = 0; i < buf.length(); i++ )
882        {
883            char ch = buf.charAt(i);
884
885            if( ch == '<' )
886            {
887                tmpBuf.append("&lt;");
888            }
889            else if( ch == '>' )
890            {
891                tmpBuf.append("&gt;");
892            }
893            else if( ch == '\"' )
894            {
895                tmpBuf.append("&quot;");
896            }
897            else if( ch == '&' )
898            {
899                //
900                //  If the following is an XML entity reference (&#.*;) we'll
901                //  leave it as it is; otherwise we'll replace it with an &amp;
902                //
903
904                boolean isEntity = false;
905                StringBuilder entityBuf = new StringBuilder();
906
907                if( i < buf.length() -1 )
908                {
909                    for( int j = i; j < buf.length(); j++ )
910                    {
911                        char ch2 = buf.charAt(j);
912
913                        if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' )
914                        {
915                            entityBuf.append(ch2);
916
917                            if( ch2 == ';' )
918                            {
919                                isEntity = true;
920                                break;
921                            }
922                        }
923                        else
924                        {
925                            break;
926                        }
927                    }
928                }
929
930                if( isEntity )
931                {
932                    tmpBuf.append( entityBuf );
933                    i = i + entityBuf.length() - 1;
934                }
935                else
936                {
937                    tmpBuf.append("&amp;");
938                }
939
940            }
941            else
942            {
943                tmpBuf.append( ch );
944            }
945        }
946
947        return tmpBuf.toString();
948    }
949
950    private Element pushElement( Element e )
951    {
952        flushPlainText();
953        m_currentElement.addContent( e );
954        m_currentElement = e;
955
956        return e;
957    }
958
959    private Element addElement( Content e )
960    {
961        if( e != null )
962        {
963            flushPlainText();
964            m_currentElement.addContent( e );
965        }
966        return m_currentElement;
967    }
968
969    /**
970     *  All elements that can be empty by the HTML DTD.
971     */
972    //  Keep sorted.
973    private static final String[] EMPTY_ELEMENTS = {
974        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
975    };
976
977    /**
978     *  Goes through the current element stack and pops all elements until this
979     *  element is found - this essentially "closes" and element.
980     *
981     *  @param s
982     *  @return The new current element, or null, if there was no such element in the entire stack.
983     */
984    private Element popElement( String s )
985    {
986        int flushedBytes = flushPlainText();
987
988        Element currEl = m_currentElement;
989
990        while( currEl.getParentElement() != null )
991        {
992            if( currEl.getName().equals(s) && !currEl.isRootElement() )
993            {
994                m_currentElement = currEl.getParentElement();
995
996                //
997                //  Check if it's okay for this element to be empty.  Then we will
998                //  trick the JDOM generator into not generating an empty element,
999                //  by putting an empty string between the tags.  Yes, it's a kludge
1000                //  but what'cha gonna do about it. :-)
1001                //
1002
1003                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 )
1004                {
1005                    currEl.addContent("");
1006                }
1007
1008                return m_currentElement;
1009            }
1010
1011            currEl = currEl.getParentElement();
1012        }
1013
1014        return null;
1015    }
1016
1017
1018    /**
1019     *  Reads the stream until it meets one of the specified
1020     *  ending characters, or stream end.  The ending character will be left
1021     *  in the stream.
1022     */
1023    private String readUntil( String endChars )
1024        throws IOException
1025    {
1026        StringBuilder sb = new StringBuilder( 80 );
1027        int ch = nextToken();
1028
1029        while( ch != -1 )
1030        {
1031            if( ch == '\\' )
1032            {
1033                ch = nextToken();
1034                if( ch == -1 )
1035                {
1036                    break;
1037                }
1038            }
1039            else
1040            {
1041                if( endChars.indexOf((char)ch) != -1 )
1042                {
1043                    pushBack( ch );
1044                    break;
1045                }
1046            }
1047            sb.append( (char) ch );
1048            ch = nextToken();
1049        }
1050
1051        return sb.toString();
1052    }
1053
1054    /**
1055     *  Reads the stream while the characters that have been specified are
1056     *  in the stream, returning then the result as a String.
1057     */
1058    private String readWhile( String endChars )
1059        throws IOException
1060    {
1061        StringBuilder sb = new StringBuilder( 80 );
1062        int ch = nextToken();
1063
1064        while( ch != -1 )
1065        {
1066            if( endChars.indexOf((char)ch) == -1 )
1067            {
1068                pushBack( ch );
1069                break;
1070            }
1071
1072            sb.append( (char) ch );
1073            ch = nextToken();
1074        }
1075
1076        return sb.toString();
1077    }
1078
1079    private JSPWikiMarkupParser m_cleanTranslator;
1080
1081    /**
1082     *  Does a lazy init.  Otherwise, we would get into a situation
1083     *  where HTMLRenderer would try and boot a TranslatorReader before
1084     *  the TranslatorReader it is contained by is up.
1085     */
1086    private JSPWikiMarkupParser getCleanTranslator()
1087    {
1088        if( m_cleanTranslator == null )
1089        {
1090            WikiContext dummyContext = new WikiContext( m_engine,
1091                                                        m_context.getHttpRequest(),
1092                                                        m_context.getPage() );
1093            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
1094
1095            m_cleanTranslator.m_allowHTML = true;
1096        }
1097
1098        return m_cleanTranslator;
1099    }
1100    /**
1101     *  Modifies the "hd" parameter to contain proper values.  Because
1102     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
1103     *  % after url encoding with '_'.
1104     *  <p>
1105     *  Counts also duplicate headings (= headings with similar name), and
1106     *  attaches a counter.
1107     */
1108    private String makeHeadingAnchor( String baseName, String title, Heading hd )
1109    {
1110        hd.m_titleText = title;
1111        title = MarkupParser.wikifyLink( title );
1112
1113        hd.m_titleSection = m_engine.encodeName(title);
1114
1115        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) )
1116        {
1117            Integer count = m_titleSectionCounter.get( hd.m_titleSection );
1118            count = count + 1;
1119            m_titleSectionCounter.put( hd.m_titleSection, count );
1120            hd.m_titleSection += "-" + count;
1121        }
1122        else
1123        {
1124            m_titleSectionCounter.put( hd.m_titleSection, 1 );
1125        }
1126
1127        hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+
1128                           "-"+hd.m_titleSection;
1129        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
1130        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
1131
1132        return hd.m_titleAnchor;
1133    }
1134
1135    private String makeSectionTitle( String title )
1136    {
1137        title = title.trim();
1138        String outTitle;
1139
1140        try
1141        {
1142            JSPWikiMarkupParser dtr = getCleanTranslator();
1143            dtr.setInputReader( new StringReader(title) );
1144
1145            CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse());
1146
1147            outTitle = ctt.getString();
1148        }
1149        catch( IOException e )
1150        {
1151            log.fatal("CleanTranslator not working", e);
1152            throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() );
1153        }
1154
1155        return outTitle;
1156    }
1157
1158    /**
1159     *  Returns XHTML for the heading.
1160     *
1161     *  @param level The level of the heading.  @see Heading
1162     *  @param title the title for the heading
1163     *  @param hd a List to which heading should be added
1164     *  @return An Element containing the heading
1165     */
1166    public Element makeHeading( int level, String title, Heading hd )
1167    {
1168        Element el = null;
1169
1170        String pageName = m_context.getPage().getName();
1171
1172        String outTitle = makeSectionTitle( title );
1173
1174        hd.m_level = level;
1175
1176        switch( level )
1177        {
1178          case Heading.HEADING_SMALL:
1179            el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
1180            break;
1181
1182          case Heading.HEADING_MEDIUM:
1183            el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
1184            break;
1185
1186          case Heading.HEADING_LARGE:
1187            el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
1188            break;
1189
1190          default:
1191            throw new InternalWikiException("Illegal heading type "+level);
1192        }
1193
1194
1195        return el;
1196    }
1197
1198    /**
1199     *  When given a link to a WikiName, we just return
1200     *  a proper HTML link for it.  The local link mutator
1201     *  chain is also called.
1202     */
1203    private Element makeCamelCaseLink( String wikiname )
1204    {
1205        String matchedLink;
1206
1207        callMutatorChain( m_localLinkMutatorChain, wikiname );
1208
1209        if( (matchedLink = linkExists( wikiname )) != null )
1210        {
1211            makeLink( READ, matchedLink, wikiname, null, null );
1212        }
1213        else
1214        {
1215            makeLink( EDIT, wikiname, wikiname, null, null );
1216        }
1217
1218        return m_currentElement;
1219    }
1220
1221    /** Holds the image URL for the duration of this parser */
1222    private String m_outlinkImageURL = null;
1223
1224    /**
1225     *  Returns an element for the external link image (out.png).  However,
1226     *  this method caches the URL for the lifetime of this MarkupParser,
1227     *  because it's commonly used, and we'll end up with possibly hundreds
1228     *  our thousands of references to it...  It's a lot faster, too.
1229     *
1230     *  @return  An element containing the HTML for the outlink image.
1231     */
1232    private Element outlinkImage()
1233    {
1234        Element el = null;
1235
1236        if( m_useOutlinkImage )
1237        {
1238            if( m_outlinkImageURL == null )
1239            {
1240                m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE );
1241            }
1242
1243            el = new Element("img").setAttribute("class", "outlink");
1244            el.setAttribute( "src", m_outlinkImageURL );
1245            el.setAttribute("alt","");
1246        }
1247
1248        return el;
1249    }
1250
1251    /**
1252     *  Takes an URL and turns it into a regular wiki link.  Unfortunately,
1253     *  because of the way that flushPlainText() works, it already encodes
1254     *  all of the XML entities.  But so does WikiContext.getURL(), so we
1255     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
1256     *  <p>
1257     *  What a crappy problem.
1258     *
1259     * @param url
1260     * @return An anchor Element containing the link.
1261     */
1262    private Element makeDirectURILink( String url )
1263    {
1264        Element result;
1265        String last = null;
1266
1267        if( url.endsWith(",") || url.endsWith(".") )
1268        {
1269            last = url.substring( url.length()-1 );
1270            url  = url.substring( 0, url.length()-1 );
1271        }
1272
1273        callMutatorChain( m_externalLinkMutatorChain, url );
1274
1275        if( isImageLink( url ) )
1276        {
1277            result = handleImageLink( StringUtils.replace(url,"&amp;","&"), url, false );
1278        }
1279        else
1280        {
1281            result = makeLink( EXTERNAL, StringUtils.replace(url,"&amp;","&"), url, null, null );
1282            addElement( outlinkImage() );
1283        }
1284
1285        if( last != null )
1286        {
1287            m_plainTextBuf.append(last);
1288        }
1289
1290        return result;
1291    }
1292
1293    /**
1294     *  Image links are handled differently:
1295     *  1. If the text is a WikiName of an existing page,
1296     *     it gets linked.
1297     *  2. If the text is an external link, then it is inlined.
1298     *  3. Otherwise it becomes an ALT text.
1299     *
1300     *  @param reallink The link to the image.
1301     *  @param link     Link text portion, may be a link to somewhere else.
1302     *  @param hasLinkText If true, then the defined link had a link text available.
1303     *                  This means that the link text may be a link to a wiki page,
1304     *                  or an external resource.
1305     */
1306
1307    // FIXME: isExternalLink() is called twice.
1308    private Element handleImageLink( String reallink, String link, boolean hasLinkText )
1309    {
1310        String possiblePage = MarkupParser.cleanLink( link );
1311
1312        if( isExternalLink( link ) && hasLinkText )
1313        {
1314            return makeLink( IMAGELINK, reallink, link, null, null );
1315        }
1316        else if( ( linkExists( possiblePage ) ) != null &&
1317                 hasLinkText )
1318        {
1319            // System.out.println("Orig="+link+", Matched: "+matchedLink);
1320            callMutatorChain( m_localLinkMutatorChain, possiblePage );
1321
1322            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
1323        }
1324        else
1325        {
1326            return makeLink( IMAGE, reallink, link, null, null );
1327        }
1328    }
1329
1330    private Element handleAccessRule( String ruleLine )
1331    {
1332        if( m_wysiwygEditorMode )
1333        {
1334            m_currentElement.addContent( "[" + ruleLine + "]" );
1335        }
1336
1337        if( !m_parseAccessRules ) return m_currentElement;
1338        Acl acl;
1339        WikiPage          page = m_context.getRealPage();
1340        // UserDatabase      db = m_context.getEngine().getUserDatabase();
1341
1342        if( ruleLine.startsWith( "{" ) )
1343            ruleLine = ruleLine.substring( 1 );
1344        if( ruleLine.endsWith( "}" ) )
1345            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1346
1347        if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine);
1348
1349        try
1350        {
1351            acl = m_engine.getAclManager().parseAcl( page, ruleLine );
1352
1353            page.setAcl( acl );
1354
1355            if( log.isDebugEnabled() ) log.debug( acl.toString() );
1356        }
1357        catch( WikiSecurityException wse )
1358        {
1359            return makeError( wse.getMessage() );
1360        }
1361
1362        return m_currentElement;
1363    }
1364
1365    /**
1366     *  Handles metadata setting [{SET foo=bar}]
1367     */
1368    private Element handleMetadata( String link )
1369    {
1370        if( m_wysiwygEditorMode )
1371        {
1372            m_currentElement.addContent( "[" + link + "]" );
1373        }
1374
1375        try
1376        {
1377            String args = link.substring( link.indexOf(' '), link.length()-1 );
1378
1379            String name = args.substring( 0, args.indexOf('=') );
1380            String val  = args.substring( args.indexOf('=')+1, args.length() );
1381
1382            name = name.trim();
1383            val  = val.trim();
1384
1385            if( val.startsWith("'") ) val = val.substring( 1 );
1386            if( val.endsWith("'") )   val = val.substring( 0, val.length()-1 );
1387
1388            // log.debug("SET name='"+name+"', value='"+val+"'.");
1389
1390            if( name.length() > 0 && val.length() > 0 )
1391            {
1392                val = m_engine.getVariableManager().expandVariables( m_context,
1393                                                                     val );
1394
1395                m_context.getPage().setAttribute( name, val );
1396            }
1397        }
1398        catch( Exception e )
1399        {
1400            ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1401            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
1402        }
1403
1404        return m_currentElement;
1405    }
1406
1407    /**
1408     *  Emits a processing instruction that will disable markup escaping. This is
1409     *  very useful if you want to emit HTML directly into the stream.
1410     *
1411     */
1412    private void disableOutputEscaping()
1413    {
1414        addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") );
1415    }
1416
1417    /**
1418     *  Gobbles up all hyperlinks that are encased in square brackets.
1419     */
1420    private Element handleHyperlinks( String linktext, int pos )
1421    {
1422        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1423
1424        StringBuilder sb = new StringBuilder(linktext.length()+80);
1425
1426        if( isAccessRule( linktext ) )
1427        {
1428            return handleAccessRule( linktext );
1429        }
1430
1431        if( isMetadata( linktext ) )
1432        {
1433            return handleMetadata( linktext );
1434        }
1435
1436        if( isPluginLink( linktext ) )
1437        {
1438            try
1439            {
1440                PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
1441                //
1442                //  This might sometimes fail, especially if there is something which looks
1443                //  like a plugin invocation but is really not.
1444                //
1445                if( pluginContent != null )
1446                {
1447                    addElement( pluginContent );
1448
1449                    pluginContent.executeParse( m_context );
1450                }
1451            }
1452            catch( PluginException e )
1453            {
1454                log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
1455                //log.info( "Root cause:",e.getRootThrowable() );
1456                if( !m_wysiwygEditorMode )
1457                {
1458                    ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE );
1459                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 
1460                                                                        m_context.getRealPage().getWiki(), 
1461                                                                        m_context.getRealPage().getName(), 
1462                                                                        e.getMessage() ) ) );
1463                }
1464            }
1465
1466            return m_currentElement;
1467        }
1468
1469        try
1470        {
1471            LinkParser.Link link = m_linkParser.parse(linktext);
1472            linktext       = link.getText();
1473            String linkref = link.getReference();
1474
1475            //
1476            //  Yes, we now have the components separated.
1477            //  linktext = the text the link should have
1478            //  linkref  = the url or page name.
1479            //
1480            //  In many cases these are the same.  [linktext|linkref].
1481            //
1482            if( VariableManager.isVariableLink( linktext ) )
1483            {
1484                Content el = new VariableContent(linktext);
1485
1486                addElement( el );
1487            }
1488            else if( isExternalLink( linkref ) )
1489            {
1490                // It's an external link, out of this Wiki
1491
1492                callMutatorChain( m_externalLinkMutatorChain, linkref );
1493
1494                if( isImageLink( linkref ) )
1495                {
1496                    handleImageLink( linkref, linktext, link.hasReference() );
1497                }
1498                else
1499                {
1500                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
1501                    addElement( outlinkImage() );
1502                }
1503            }
1504            else if( link.isInterwikiLink() )
1505            {
1506                // It's an interwiki link
1507                // InterWiki links also get added to external link chain
1508                // after the links have been resolved.
1509
1510                // FIXME: There is an interesting issue here:  We probably should
1511                //        URLEncode the wikiPage, but we can't since some of the
1512                //        Wikis use slashes (/), which won't survive URLEncoding.
1513                //        Besides, we don't know which character set the other Wiki
1514                //        is using, so you'll have to write the entire name as it appears
1515                //        in the URL.  Bugger.
1516
1517                String extWiki  = link.getExternalWiki();
1518                String wikiPage = link.getExternalWikiPage();
1519
1520                if( m_wysiwygEditorMode )
1521                {
1522                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
1523                }
1524                else
1525                {
1526                    String urlReference = m_engine.getInterWikiURL( extWiki );
1527
1528                    if( urlReference != null )
1529                    {
1530                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1531                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
1532
1533                        if( isImageLink(urlReference) )
1534                        {
1535                            handleImageLink( urlReference, linktext, link.hasReference() );
1536                        }
1537                        else
1538                        {
1539                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
1540                        }
1541
1542                        if( isExternalLink(urlReference) )
1543                        {
1544                            addElement( outlinkImage() );
1545                        }
1546                    }
1547                    else
1548                    {
1549                        Object[] args = { extWiki };
1550                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
1551                    }
1552                }
1553            }
1554            else if( linkref.startsWith("#") )
1555            {
1556                // It defines a local footnote
1557                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1558            }
1559            else if( TextUtil.isNumber( linkref ) )
1560            {
1561                // It defines a reference to a local footnote
1562                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1563            }
1564            else
1565            {
1566                int hashMark = -1;
1567
1568                //
1569                //  Internal wiki link, but is it an attachment link?
1570                //
1571                String attachment = findAttachment( linkref );
1572                if( attachment != null )
1573                {
1574                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1575
1576                    if( isImageLink( linkref ) )
1577                    {
1578                        attachment = m_context.getURL( WikiContext.ATTACH, attachment );
1579                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1580                    }
1581                    else
1582                    {
1583                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1584                    }
1585                }
1586                else if( (hashMark = linkref.indexOf('#')) != -1 )
1587                {
1588                    // It's an internal Wiki link, but to a named section
1589
1590                    String namedSection = linkref.substring( hashMark+1 );
1591                    linkref = linkref.substring( 0, hashMark );
1592
1593                    linkref = MarkupParser.cleanLink( linkref );
1594
1595                    callMutatorChain( m_localLinkMutatorChain, linkref );
1596
1597                    String matchedLink;
1598                    if( (matchedLink = linkExists( linkref )) != null )
1599                    {
1600                        String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection));
1601                        sectref = sectref.replace('%', '_');
1602                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1603                    }
1604                    else
1605                    {
1606                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1607                    }
1608                }
1609                else
1610                {
1611                    // It's an internal Wiki link
1612                    linkref = MarkupParser.cleanLink( linkref );
1613
1614                    callMutatorChain( m_localLinkMutatorChain, linkref );
1615
1616                    String matchedLink = linkExists( linkref );
1617
1618                    if( matchedLink != null )
1619                    {
1620                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1621                    }
1622                    else
1623                    {
1624                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1625                    }
1626                }
1627            }
1628        }
1629        catch( ParseException e )
1630        {
1631            log.info("Parser failure: ",e);
1632            Object[] args = { e.getMessage() };
1633            addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1634        }
1635
1636        return m_currentElement;
1637    }
1638
1639    private String findAttachment( String linktext )
1640    {
1641        AttachmentManager mgr = m_engine.getAttachmentManager();
1642        Attachment att = null;
1643
1644        try
1645        {
1646            att = mgr.getAttachmentInfo( m_context, linktext );
1647        }
1648        catch( ProviderException e )
1649        {
1650            log.warn("Finding attachments failed: ",e);
1651            return null;
1652        }
1653
1654        if( att != null )
1655        {
1656            return att.getName();
1657        }
1658        else if( linktext.indexOf('/') != -1 )
1659        {
1660            return linktext;
1661        }
1662
1663        return null;
1664    }
1665
1666    /**
1667     *  Pushes back any string that has been read.  It will obviously
1668     *  be pushed back in a reverse order.
1669     *
1670     *  @since 2.1.77
1671     */
1672    private void pushBack( String s )
1673        throws IOException
1674    {
1675        for( int i = s.length()-1; i >= 0; i-- )
1676        {
1677            pushBack( s.charAt(i) );
1678        }
1679    }
1680
1681    private Element handleBackslash()
1682        throws IOException
1683    {
1684        int ch = nextToken();
1685
1686        if( ch == '\\' )
1687        {
1688            int ch2 = nextToken();
1689
1690            if( ch2 == '\\' )
1691            {
1692                pushElement( new Element("br").setAttribute("clear","all"));
1693                return popElement("br");
1694            }
1695
1696            pushBack( ch2 );
1697
1698            pushElement( new Element("br") );
1699            return popElement("br");
1700        }
1701
1702        pushBack( ch );
1703
1704        return null;
1705    }
1706
1707    private Element handleUnderscore()
1708        throws IOException
1709    {
1710        int ch = nextToken();
1711        Element el = null;
1712
1713        if( ch == '_' )
1714        {
1715            if( m_isbold )
1716            {
1717                el = popElement("b");
1718            }
1719            else
1720            {
1721                el = pushElement( new Element("b") );
1722            }
1723            m_isbold = !m_isbold;
1724        }
1725        else
1726        {
1727            pushBack( ch );
1728        }
1729
1730        return el;
1731    }
1732
1733
1734    /**
1735     *  For example: italics.
1736     */
1737    private Element handleApostrophe()
1738        throws IOException
1739    {
1740        int ch = nextToken();
1741        Element el = null;
1742
1743        if( ch == '\'' )
1744        {
1745            if( m_isitalic )
1746            {
1747                el = popElement("i");
1748            }
1749            else
1750            {
1751                el = pushElement( new Element("i") );
1752            }
1753            m_isitalic = !m_isitalic;
1754        }
1755        else
1756        {
1757            pushBack( ch );
1758        }
1759
1760        return el;
1761    }
1762
1763    private Element handleOpenbrace( boolean isBlock )
1764        throws IOException
1765    {
1766        int ch = nextToken();
1767
1768        if( ch == '{' )
1769        {
1770            int ch2 = nextToken();
1771
1772            if( ch2 == '{' )
1773            {
1774                m_isPre = true;
1775                m_isEscaping = true;
1776                m_isPreBlock = isBlock;
1777
1778                if( isBlock )
1779                {
1780                    startBlockLevel();
1781                    return pushElement( new Element("pre") );
1782                }
1783
1784                return pushElement( new Element("span").setAttribute("style","font-family:monospace; white-space:pre;") );
1785            }
1786
1787            pushBack( ch2 );
1788
1789            return pushElement( new Element("tt") );
1790        }
1791
1792        pushBack( ch );
1793
1794        return null;
1795    }
1796
1797    /**
1798     *  Handles both }} and }}}
1799     */
1800    private Element handleClosebrace()
1801        throws IOException
1802    {
1803        int ch2 = nextToken();
1804
1805        if( ch2 == '}' )
1806        {
1807            int ch3 = nextToken();
1808
1809            if( ch3 == '}' )
1810            {
1811                if( m_isPre )
1812                {
1813                    if( m_isPreBlock )
1814                    {
1815                        popElement( "pre" );
1816                    }
1817                    else
1818                    {
1819                        popElement( "span" );
1820                    }
1821
1822                    m_isPre = false;
1823                    m_isEscaping = false;
1824                    return m_currentElement;
1825                }
1826
1827                m_plainTextBuf.append("}}}");
1828                return m_currentElement;
1829            }
1830
1831            pushBack( ch3 );
1832
1833            if( !m_isEscaping )
1834            {
1835                return popElement("tt");
1836            }
1837        }
1838
1839        pushBack( ch2 );
1840
1841        return null;
1842    }
1843
1844    private Element handleDash()
1845        throws IOException
1846    {
1847        int ch = nextToken();
1848
1849        if( ch == '-' )
1850        {
1851            int ch2 = nextToken();
1852
1853            if( ch2 == '-' )
1854            {
1855                int ch3 = nextToken();
1856
1857                if( ch3 == '-' )
1858                {
1859                    // Empty away all the rest of the dashes.
1860                    // Do not forget to return the first non-match back.
1861                    do
1862                    {
1863                        ch = nextToken();
1864                    }
1865                    while ( ch == '-' );
1866
1867                    pushBack(ch);
1868                    startBlockLevel();
1869                    pushElement( new Element("hr") );
1870                    return popElement( "hr" );
1871                }
1872
1873                pushBack( ch3 );
1874            }
1875            pushBack( ch2 );
1876        }
1877
1878        pushBack( ch );
1879
1880        return null;
1881    }
1882
1883    private Element handleHeading()
1884        throws IOException
1885    {
1886        Element el = null;
1887
1888        int ch  = nextToken();
1889
1890        Heading hd = new Heading();
1891
1892        if( ch == '!' )
1893        {
1894            int ch2 = nextToken();
1895
1896            if( ch2 == '!' )
1897            {
1898                String title = peekAheadLine();
1899
1900                el = makeHeading( Heading.HEADING_LARGE, title, hd);
1901            }
1902            else
1903            {
1904                pushBack( ch2 );
1905                String title = peekAheadLine();
1906                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1907            }
1908        }
1909        else
1910        {
1911            pushBack( ch );
1912            String title = peekAheadLine();
1913            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1914        }
1915
1916        callHeadingListenerChain( hd );
1917
1918        m_lastHeading = hd;
1919
1920        if( el != null ) pushElement(el);
1921
1922        return el;
1923    }
1924
1925    /**
1926     *  Reads the stream until the next EOL or EOF.  Note that it will also read the
1927     *  EOL from the stream.
1928     */
1929    private StringBuilder readUntilEOL()
1930        throws IOException
1931    {
1932        int ch;
1933        StringBuilder buf = new StringBuilder( 256 );
1934
1935        while( true )
1936        {
1937            ch = nextToken();
1938
1939            if( ch == -1 )
1940                break;
1941
1942            buf.append( (char) ch );
1943
1944            if( ch == '\n' )
1945                break;
1946        }
1947        return buf;
1948    }
1949
1950    /** Controls whether italic is restarted after a paragraph shift */
1951
1952    private boolean m_restartitalic = false;
1953    private boolean m_restartbold   = false;
1954
1955    private boolean m_newLine;
1956
1957    /**
1958     *  Starts a block level element, therefore closing
1959     *  a potential open paragraph tag.
1960     */
1961    private void startBlockLevel()
1962    {
1963        // These may not continue over block level limits in XHTML
1964
1965        popElement("i");
1966        popElement("b");
1967        popElement("tt");
1968
1969        if( m_isOpenParagraph )
1970        {
1971            m_isOpenParagraph = false;
1972            popElement("p");
1973            m_plainTextBuf.append("\n"); // Just small beautification
1974        }
1975
1976        m_restartitalic = m_isitalic;
1977        m_restartbold   = m_isbold;
1978
1979        m_isitalic = false;
1980        m_isbold   = false;
1981    }
1982
1983    private static String getListType( char c )
1984    {
1985        if( c == '*' )
1986        {
1987            return "ul";
1988        }
1989        else if( c == '#' )
1990        {
1991            return "ol";
1992        }
1993        throw new InternalWikiException("Parser got faulty list type: "+c);
1994    }
1995    /**
1996     *  Like original handleOrderedList() and handleUnorderedList()
1997     *  however handles both ordered ('#') and unordered ('*') mixed together.
1998     */
1999
2000    // FIXME: Refactor this; it's a bit messy.
2001
2002    private Element handleGeneralList()
2003        throws IOException
2004    {
2005         startBlockLevel();
2006
2007         String strBullets = readWhile( "*#" );
2008         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
2009         int numBullets = strBullets.length();
2010
2011         // override the beginning portion of bullet pattern to be like the previous
2012         // to simulate PHPWiki style lists
2013
2014         if(m_allowPHPWikiStyleLists)
2015         {
2016             // only substitute if different
2017             if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
2018                   (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) )
2019             {
2020                 if(numBullets <= m_genlistlevel)
2021                 {
2022                     // Substitute all but the last character (keep the expressed bullet preference)
2023                     strBullets  = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "")
2024                                   + strBullets.substring(numBullets-1, numBullets);
2025                 }
2026                 else
2027                 {
2028                     strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets);
2029                 }
2030             }
2031         }
2032
2033         //
2034         //  Check if this is still of the same type
2035         //
2036         if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
2037            (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) )
2038         {
2039             if( numBullets > m_genlistlevel )
2040             {
2041                 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) );
2042
2043                 for( ; m_genlistlevel < numBullets; m_genlistlevel++ )
2044                 {
2045                     // bullets are growing, get from new bullet list
2046                     pushElement( new Element("li") );
2047                     pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) ));
2048                 }
2049             }
2050             else if( numBullets < m_genlistlevel )
2051             {
2052                 //  Close the previous list item.
2053                 // buf.append( m_renderer.closeListItem() );
2054                 popElement( "li" );
2055
2056                 for( ; m_genlistlevel > numBullets; m_genlistlevel-- )
2057                 {
2058                     // bullets are shrinking, get from old bullet list
2059
2060                     popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
2061                     if( m_genlistlevel > 0 )
2062                     {
2063                         popElement( "li" );
2064                     }
2065
2066                 }
2067             }
2068             else
2069             {
2070                 if( m_genlistlevel > 0 )
2071                 {
2072                     popElement( "li" );
2073                 }
2074             }
2075         }
2076         else
2077         {
2078             //
2079             //  The pattern has changed, unwind and restart
2080             //
2081             int  numEqualBullets;
2082             int  numCheckBullets;
2083
2084             // find out how much is the same
2085             numEqualBullets = 0;
2086             numCheckBullets = Math.min(numBullets,m_genlistlevel);
2087
2088             while( numEqualBullets < numCheckBullets )
2089             {
2090                 // if the bullets are equal so far, keep going
2091                 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets))
2092                     numEqualBullets++;
2093                 // otherwise giveup, we have found how many are equal
2094                 else
2095                     break;
2096             }
2097
2098             //unwind
2099             for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- )
2100             {
2101                 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) );
2102                 if( m_genlistlevel > numBullets )
2103                 {
2104                     popElement("li");
2105                 }
2106             }
2107
2108             //rewind
2109
2110             pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) );
2111             for(int i = numEqualBullets; i < numBullets; i++)
2112             {
2113                 pushElement( new Element("li") );
2114                 pushElement( new Element( getListType( strBullets.charAt(i) ) ) );
2115             }
2116             m_genlistlevel = numBullets;
2117         }
2118
2119         //
2120         //  Push a new list item, and eat away any extra whitespace
2121         //
2122         pushElement( new Element("li") );
2123         readWhile(" ");
2124
2125         // work done, remember the new bullet list (in place of old one)
2126         m_genlistBulletBuffer.setLength(0);
2127         m_genlistBulletBuffer.append(strBullets);
2128
2129         return m_currentElement;
2130    }
2131
2132    private Element unwindGeneralList()
2133    {
2134        //unwind
2135        for( ; m_genlistlevel > 0; m_genlistlevel-- )
2136        {
2137            popElement( "li" );
2138            popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
2139        }
2140
2141        m_genlistBulletBuffer.setLength(0);
2142
2143        return null;
2144    }
2145
2146
2147    private Element handleDefinitionList()
2148        throws IOException
2149    {
2150        if( !m_isdefinition )
2151        {
2152            m_isdefinition = true;
2153
2154            startBlockLevel();
2155
2156            pushElement( new Element("dl") );
2157            return pushElement( new Element("dt") );
2158        }
2159
2160        return null;
2161    }
2162
2163    private Element handleOpenbracket()
2164        throws IOException
2165    {
2166        StringBuilder sb = new StringBuilder(40);
2167        int pos = getPosition();
2168        int ch = nextToken();
2169        boolean isPlugin = false;
2170
2171        if( ch == '[' )
2172        {
2173            if( m_wysiwygEditorMode )
2174            {
2175                sb.append( '[' );
2176            }
2177
2178            sb.append( (char)ch );
2179
2180            while( (ch = nextToken()) == '[' )
2181            {
2182                sb.append( (char)ch );
2183            }
2184        }
2185
2186
2187        if( ch == '{' )
2188        {
2189            isPlugin = true;
2190        }
2191
2192        pushBack( ch );
2193
2194        if( sb.length() > 0 )
2195        {
2196            m_plainTextBuf.append( sb );
2197            return m_currentElement;
2198        }
2199
2200        //
2201        //  Find end of hyperlink
2202        //
2203
2204        ch = nextToken();
2205        int nesting = 1;    // Check for nested plugins
2206
2207        while( ch != -1 )
2208        {
2209            int ch2 = nextToken(); pushBack(ch2);
2210
2211            if( isPlugin )
2212            {
2213                if( ch == '[' && ch2 == '{' )
2214                {
2215                    nesting++;
2216                }
2217                else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' )
2218                {
2219                    break;
2220                }
2221                else if( ch == '}' && ch2 == ']' )
2222                {
2223                    // NB: This will be decremented once at the end
2224                    nesting--;
2225                }
2226            }
2227            else
2228            {
2229                if( ch == ']' )
2230                {
2231                    break;
2232                }
2233            }
2234
2235            sb.append( (char) ch );
2236
2237            ch = nextToken();
2238        }
2239
2240        //
2241        //  If the link is never finished, do some tricks to display the rest of the line
2242        //  unchanged.
2243        //
2244        if( ch == -1 )
2245        {
2246            log.debug("Warning: unterminated link detected!");
2247            m_isEscaping = true;
2248            m_plainTextBuf.append( sb );
2249            flushPlainText();
2250            m_isEscaping = false;
2251            return m_currentElement;
2252        }
2253
2254        return handleHyperlinks( sb.toString(), pos );
2255    }
2256
2257    /**
2258     *  Reads the stream until the current brace is closed or stream end.
2259     */
2260    private String readBraceContent( char opening, char closing )
2261        throws IOException
2262    {
2263        StringBuilder sb = new StringBuilder(40);
2264        int braceLevel = 1;
2265        int ch;
2266        while(( ch = nextToken() ) != -1 )
2267        {
2268            if( ch == '\\' )
2269            {
2270                continue;
2271            }
2272            else if ( ch == opening )
2273            {
2274                braceLevel++;
2275            }
2276            else if ( ch == closing )
2277            {
2278                braceLevel--;
2279                if (braceLevel==0)
2280                {
2281                  break;
2282                }
2283            }
2284            sb.append( (char)ch );
2285        }
2286        return sb.toString();
2287    }
2288
2289
2290    /**
2291     *  Handles constructs of type %%(style) and %%class
2292     * @param newLine
2293     * @return An Element containing the div or span, depending on the situation.
2294     * @throws IOException
2295     */
2296    private Element handleDiv( boolean newLine )
2297        throws IOException
2298    {
2299        int ch = nextToken();
2300        Element el = null;
2301
2302        if( ch == '%' )
2303        {
2304            String style = null;
2305            String clazz = null;
2306
2307            ch = nextToken();
2308
2309            //
2310            //  Style or class?
2311            //
2312            if( ch == '(' )
2313            {
2314                style = readBraceContent('(',')');
2315            }
2316            else if( Character.isLetter( (char) ch ) )
2317            {
2318                pushBack( ch );
2319                clazz = readUntil( " \t\n\r" );
2320                ch = nextToken();
2321
2322                //
2323                //  Pop out only spaces, so that the upcoming EOL check does not check the
2324                //  next line.
2325                //
2326                if( ch == '\n' || ch == '\r' )
2327                {
2328                    pushBack(ch);
2329                }
2330            }
2331            else
2332            {
2333                //
2334                // Anything else stops.
2335                //
2336
2337                pushBack(ch);
2338
2339                try
2340                {
2341                    Boolean isSpan = m_styleStack.pop();
2342
2343                    if( isSpan == null )
2344                    {
2345                        // Fail quietly
2346                    }
2347                    else if( isSpan.booleanValue() )
2348                    {
2349                        el = popElement( "span" );
2350                    }
2351                    else
2352                    {
2353                        el = popElement( "div" );
2354                    }
2355                }
2356                catch( EmptyStackException e )
2357                {
2358                    log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened.");
2359                    return m_currentElement;
2360                }
2361
2362                return el;
2363            }
2364
2365            //
2366            //  Check if there is an attempt to do something nasty
2367            //
2368
2369            try
2370            {
2371                style = StringEscapeUtils.unescapeHtml(style);
2372                if( style != null && style.indexOf("javascript:") != -1 )
2373                {
2374                    log.debug("Attempt to output javascript within CSS:"+style);
2375                    ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2376                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
2377                }
2378            }
2379            catch( NumberFormatException e )
2380            {
2381                //
2382                //  If there are unknown entities, we don't want the parser to stop.
2383                //
2384                ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2385                String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
2386                return addElement( makeError( msg ) );
2387            }
2388
2389            //
2390            //  Decide if we should open a div or a span?
2391            //
2392            String eol = peekAheadLine();
2393
2394            if( eol.trim().length() > 0 )
2395            {
2396                // There is stuff after the class
2397
2398                el = new Element("span");
2399
2400                m_styleStack.push( Boolean.TRUE );
2401            }
2402            else
2403            {
2404                startBlockLevel();
2405                el = new Element("div");
2406                m_styleStack.push( Boolean.FALSE );
2407            }
2408
2409            if( style != null ) el.setAttribute("style", style);
2410            if( clazz != null ) el.setAttribute("class", clazz );
2411            el = pushElement( el );
2412
2413            return el;
2414        }
2415
2416        pushBack(ch);
2417
2418        return el;
2419    }
2420
2421    private Element handleSlash( boolean newLine )
2422        throws IOException
2423    {
2424        int ch = nextToken();
2425
2426        pushBack(ch);
2427        if( ch == '%' && !m_styleStack.isEmpty() )
2428        {
2429            return handleDiv( newLine );
2430        }
2431
2432        return null;
2433    }
2434
2435    private Element handleBar( boolean newLine )
2436        throws IOException
2437    {
2438        Element el = null;
2439
2440        if( !m_istable && !newLine )
2441        {
2442            return null;
2443        }
2444
2445        //
2446        //  If the bar is in the first column, we will either start
2447        //  a new table or continue the old one.
2448        //
2449
2450        if( newLine )
2451        {
2452            if( !m_istable )
2453            {
2454                startBlockLevel();
2455                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
2456                m_istable = true;
2457                m_rowNum = 0;
2458            }
2459
2460            m_rowNum++;
2461            Element tr = ( m_rowNum % 2 != 0 )
2462                       ? new Element("tr").setAttribute("class", "odd")
2463                       : new Element("tr");
2464            el = pushElement( tr );
2465        }
2466
2467        //
2468        //  Check out which table cell element to start;
2469        //  a header element (th) or a regular element (td).
2470        //
2471        int ch = nextToken();
2472
2473        if( ch == '|' )
2474        {
2475            if( !newLine )
2476            {
2477                el = popElement("th");
2478                if( el == null ) popElement("td");
2479            }
2480            el = pushElement( new Element("th") );
2481        }
2482        else
2483        {
2484            if( !newLine )
2485            {
2486                el = popElement("td");
2487                if( el == null ) popElement("th");
2488            }
2489
2490            el = pushElement( new Element("td") );
2491
2492            pushBack( ch );
2493        }
2494
2495        return el;
2496    }
2497
2498    /**
2499     *  Generic escape of next character or entity.
2500     */
2501    private Element handleTilde()
2502        throws IOException
2503    {
2504        int ch = nextToken();
2505
2506        if( ch == ' ' )
2507        {
2508            if( m_wysiwygEditorMode )
2509            {
2510                m_plainTextBuf.append( "~ " );
2511            }
2512            return m_currentElement;
2513        }
2514
2515        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
2516            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
2517            ch == '{' || ch == ']' || ch == '}' || ch == '%' )
2518        {
2519            if( m_wysiwygEditorMode )
2520            {
2521                m_plainTextBuf.append( '~' );
2522            }
2523
2524            m_plainTextBuf.append( (char)ch );
2525            m_plainTextBuf.append(readWhile( ""+(char)ch ));
2526            return m_currentElement;
2527        }
2528
2529        // No escape.
2530        pushBack( ch );
2531
2532        return null;
2533    }
2534
2535    private void fillBuffer( Element startElement )
2536        throws IOException
2537    {
2538        m_currentElement = startElement;
2539
2540        boolean quitReading = false;
2541        m_newLine = true;
2542        disableOutputEscaping();
2543
2544        while(!quitReading)
2545        {
2546            int ch = nextToken();
2547
2548            if( ch == -1 ) break;
2549
2550            //
2551            //  Check if we're actually ending the preformatted mode.
2552            //  We still must do an entity transformation here.
2553            //
2554            if( m_isEscaping )
2555            {
2556                if( ch == '}' )
2557                {
2558                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
2559                }
2560                else if( ch == -1 )
2561                {
2562                    quitReading = true;
2563                }
2564                else if( ch == '\r' )
2565                {
2566                    // DOS line feeds we ignore.
2567                }
2568                else if( ch == '<' )
2569                {
2570                    m_plainTextBuf.append( "&lt;" );
2571                }
2572                else if( ch == '>' )
2573                {
2574                    m_plainTextBuf.append( "&gt;" );
2575                }
2576                else if( ch == '&' )
2577                {
2578                    m_plainTextBuf.append( "&amp;" );
2579                }
2580                else if( ch == '~' )
2581                {
2582                    String braces = readWhile("}");
2583                    if( braces.length() >= 3 )
2584                    {
2585                        m_plainTextBuf.append("}}}");
2586
2587                        braces = braces.substring(3);
2588                    }
2589                    else
2590                    {
2591                        m_plainTextBuf.append( (char) ch );
2592                    }
2593
2594                    for( int i = braces.length()-1; i >= 0; i-- )
2595                    {
2596                        pushBack(braces.charAt(i));
2597                    }
2598                }
2599                else
2600                {
2601                    m_plainTextBuf.append( (char) ch );
2602                }
2603
2604                continue;
2605            }
2606
2607            //
2608            //  An empty line stops a list
2609            //
2610            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 )
2611            {
2612                m_plainTextBuf.append(unwindGeneralList());
2613            }
2614
2615            if( m_newLine && ch != '|' && m_istable )
2616            {
2617                popElement("table");
2618                m_istable = false;
2619            }
2620
2621            int skip = IGNORE;
2622
2623            //
2624            //  Do the actual parsing and catch any errors.
2625            //
2626            try
2627            {
2628                skip = parseToken( ch );
2629            }
2630            catch( IllegalDataException e )
2631            {
2632                log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage());
2633
2634                makeError("Error: "+cleanupSuspectData(e.getMessage()) );
2635            }
2636
2637            //
2638            //   The idea is as follows:  If the handler method returns
2639            //   an element (el != null), it is assumed that it has been
2640            //   added in the stack.  Otherwise the character is added
2641            //   as is to the plaintext buffer.
2642            //
2643            //   For the transition phase, if s != null, it also gets
2644            //   added in the plaintext buffer.
2645            //
2646
2647            switch( skip )
2648            {
2649                case ELEMENT:
2650                    m_newLine = false;
2651                    break;
2652
2653                case CHARACTER:
2654                    m_plainTextBuf.append( (char) ch );
2655                    m_newLine = false;
2656                    break;
2657
2658                case IGNORE:
2659                default:
2660                    break;
2661            }
2662        }
2663
2664        closeHeadings();
2665        popElement("domroot");
2666    }
2667
2668    private String cleanupSuspectData( String s )
2669    {
2670        StringBuilder sb = new StringBuilder( s.length() );
2671
2672        for( int i = 0; i < s.length(); i++ )
2673        {
2674            char c = s.charAt(i);
2675
2676            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
2677            else sb.append( "0x"+Integer.toString(c,16).toUpperCase() );
2678        }
2679
2680        return sb.toString();
2681    }
2682
2683    /** The token is a plain character. */
2684    protected static final int CHARACTER = 0;
2685
2686    /** The token is a wikimarkup element. */
2687    protected static final int ELEMENT   = 1;
2688
2689    /** The token is to be ignored. */
2690    protected static final int IGNORE    = 2;
2691
2692    /**
2693     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
2694     *  you think this was a wiki markup element, and IGNORE, if you think
2695     *  we should ignore this altogether.
2696     *  <p>
2697     *  To add your own MarkupParser, you can override this method, but it
2698     *  is recommended that you call super.parseToken() as well to gain advantage
2699     *  of JSPWiki's own markup.  You can call it at the start of your own
2700     *  parseToken() or end - it does not matter.
2701     *
2702     * @param ch The character under investigation
2703     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2704     * @throws IOException If parsing fails.
2705     */
2706    protected int parseToken( int ch )
2707        throws IOException
2708    {
2709        Element el = null;
2710
2711        //
2712        //  Now, check the incoming token.
2713        //
2714        switch( ch )
2715        {
2716          case '\r':
2717            // DOS linefeeds we forget
2718            return IGNORE;
2719
2720          case '\n':
2721            //
2722            //  Close things like headings, etc.
2723            //
2724
2725            // FIXME: This is not really very fast
2726
2727            closeHeadings();
2728
2729            popElement("dl"); // Close definition lists.
2730            if( m_istable )
2731            {
2732                popElement("tr");
2733            }
2734
2735            m_isdefinition = false;
2736
2737            if( m_newLine )
2738            {
2739                // Paragraph change.
2740                startBlockLevel();
2741
2742                //
2743                //  Figure out which elements cannot be enclosed inside
2744                //  a <p></p> pair according to XHTML rules.
2745                //
2746                String nextLine = peekAheadLine();
2747                if( nextLine.length() == 0 ||
2748                    (nextLine.length() > 0 &&
2749                     !nextLine.startsWith("{{{") &&
2750                     !nextLine.startsWith("----") &&
2751                     !nextLine.startsWith("%%") &&
2752                     "*#!;".indexOf( nextLine.charAt(0) ) == -1) )
2753                {
2754                    pushElement( new Element("p") );
2755                    m_isOpenParagraph = true;
2756
2757                    if( m_restartitalic )
2758                    {
2759                        pushElement( new Element("i") );
2760                        m_isitalic = true;
2761                        m_restartitalic = false;
2762                    }
2763                    if( m_restartbold )
2764                    {
2765                        pushElement( new Element("b") );
2766                        m_isbold = true;
2767                        m_restartbold = false;
2768                    }
2769                }
2770            }
2771            else
2772            {
2773                m_plainTextBuf.append("\n");
2774                m_newLine = true;
2775            }
2776            return IGNORE;
2777
2778
2779          case '\\':
2780            el = handleBackslash();
2781            break;
2782
2783          case '_':
2784            el = handleUnderscore();
2785            break;
2786
2787          case '\'':
2788            el = handleApostrophe();
2789            break;
2790
2791          case '{':
2792            el = handleOpenbrace( m_newLine );
2793            break;
2794
2795          case '}':
2796            el = handleClosebrace();
2797            break;
2798
2799          case '-':
2800            if( m_newLine )
2801                el = handleDash();
2802
2803            break;
2804
2805          case '!':
2806            if( m_newLine )
2807            {
2808                el = handleHeading();
2809            }
2810            break;
2811
2812          case ';':
2813            if( m_newLine )
2814            {
2815                el = handleDefinitionList();
2816            }
2817            break;
2818
2819          case ':':
2820            if( m_isdefinition )
2821            {
2822                popElement("dt");
2823                el = pushElement( new Element("dd") );
2824                m_isdefinition = false;
2825            }
2826            break;
2827
2828          case '[':
2829            el = handleOpenbracket();
2830            break;
2831
2832          case '*':
2833            if( m_newLine )
2834            {
2835                pushBack('*');
2836                el = handleGeneralList();
2837            }
2838            break;
2839
2840          case '#':
2841            if( m_newLine )
2842            {
2843                pushBack('#');
2844                el = handleGeneralList();
2845            }
2846            break;
2847
2848          case '|':
2849            el = handleBar( m_newLine );
2850            break;
2851
2852          case '~':
2853            el = handleTilde();
2854            break;
2855
2856          case '%':
2857            el = handleDiv( m_newLine );
2858            break;
2859
2860          case '/':
2861            el = handleSlash( m_newLine );
2862            break;
2863
2864          default:
2865            break;
2866        }
2867
2868        return el != null ? ELEMENT : CHARACTER;
2869    }
2870
2871    private void closeHeadings()
2872    {
2873        if( m_lastHeading != null && !m_wysiwygEditorMode )
2874        {
2875            // Add the hash anchor element at the end of the heading
2876            addElement( new Element("a").setAttribute( "class","hashlink" ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) );
2877            m_lastHeading = null;
2878        }
2879        popElement("h2");
2880        popElement("h3");
2881        popElement("h4");
2882    }
2883
2884    /**
2885     *  Parses the entire document from the Reader given in the constructor or
2886     *  set by {@link #setInputReader(Reader)}.
2887     *
2888     *  @return A WikiDocument, ready to be passed to the renderer.
2889     *  @throws IOException If parsing cannot be accomplished.
2890     */
2891    public WikiDocument parse()
2892        throws IOException
2893    {
2894        WikiDocument d = new WikiDocument( m_context.getPage() );
2895        d.setContext( m_context );
2896
2897        Element rootElement = new Element("domroot");
2898
2899        d.setRootElement( rootElement );
2900
2901        fillBuffer( rootElement );
2902
2903        paragraphify(rootElement);
2904
2905        return d;
2906    }
2907
2908    /**
2909     *  Checks out that the first paragraph is correctly installed.
2910     *
2911     *  @param rootElement
2912     */
2913    private void paragraphify(Element rootElement)
2914    {
2915        //
2916        //  Add the paragraph tag to the first paragraph
2917        //
2918        List< Content > kids = rootElement.getContent();
2919
2920        if( rootElement.getChild("p") != null )
2921        {
2922            ArrayList<Content> ls = new ArrayList<Content>();
2923            int idxOfFirstContent = 0;
2924            int count = 0;
2925
2926            for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ )
2927            {
2928                Content c = i.next();
2929                if( c instanceof Element )
2930                {
2931                    String name = ( ( Element )c ).getName();
2932                    if( isBlockLevel( name ) ) break;
2933                }
2934
2935                if( !(c instanceof ProcessingInstruction) )
2936                {
2937                    ls.add( c );
2938                    if( idxOfFirstContent == 0 ) idxOfFirstContent = count;
2939                }
2940            }
2941
2942            //
2943            //  If there were any elements, then add a new <p> (unless it would
2944            //  be an empty one)
2945            //
2946            if( ls.size() > 0 )
2947            {
2948                Element newel = new Element("p");
2949
2950                for( Iterator< Content > i = ls.iterator(); i.hasNext(); )
2951                {
2952                    Content c = i.next();
2953
2954                    c.detach();
2955                    newel.addContent(c);
2956                }
2957
2958                //
2959                // Make sure there are no empty <p/> tags added.
2960                //
2961                if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() )
2962                    rootElement.addContent(idxOfFirstContent, newel);
2963            }
2964        }
2965    }
2966
2967
2968    /**
2969     *  Compares two Strings, and if one starts with the other, then
2970     *  returns null.  Otherwise just like the normal Comparator
2971     *  for strings.
2972     *
2973     *  @since
2974     */
2975    private static class StartingComparator implements Comparator<String>
2976    {
2977        public int compare( String s1, String s2 )
2978        {
2979            if( s1.length() > s2.length() )
2980            {
2981                if( s1.startsWith(s2) && s2.length() > 1 ) return 0;
2982            }
2983            else
2984            {
2985                if( s2.startsWith(s1) && s1.length() > 1 ) return 0;
2986            }
2987
2988            return s1.compareTo( s2 );
2989        }
2990
2991    }
2992
2993
2994}
2995