001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.commons.text.StringEscapeUtils;
023import org.apache.log4j.Logger;
024import org.apache.oro.text.regex.MalformedPatternException;
025import org.apache.oro.text.regex.MatchResult;
026import org.apache.oro.text.regex.Pattern;
027import org.apache.oro.text.regex.PatternCompiler;
028import org.apache.oro.text.regex.PatternMatcher;
029import org.apache.oro.text.regex.Perl5Compiler;
030import org.apache.oro.text.regex.Perl5Matcher;
031import org.apache.wiki.InternalWikiException;
032import org.apache.wiki.StringTransmutator;
033import org.apache.wiki.WikiContext;
034import org.apache.wiki.WikiPage;
035import org.apache.wiki.api.exceptions.PluginException;
036import org.apache.wiki.api.plugin.WikiPlugin;
037import org.apache.wiki.auth.WikiSecurityException;
038import org.apache.wiki.auth.acl.Acl;
039import org.apache.wiki.i18n.InternationalizationManager;
040import org.apache.wiki.preferences.Preferences;
041import org.apache.wiki.render.CleanTextRenderer;
042import org.apache.wiki.render.RenderingManager;
043import org.apache.wiki.util.TextUtil;
044import org.jdom2.Attribute;
045import org.jdom2.Content;
046import org.jdom2.Element;
047import org.jdom2.IllegalDataException;
048import org.jdom2.ProcessingInstruction;
049import org.jdom2.Verifier;
050
051import javax.xml.transform.Result;
052import java.io.IOException;
053import java.io.Reader;
054import java.io.StringReader;
055import java.text.MessageFormat;
056import java.util.ArrayList;
057import java.util.Arrays;
058import java.util.Collection;
059import java.util.EmptyStackException;
060import java.util.HashMap;
061import java.util.Iterator;
062import java.util.List;
063import java.util.Map;
064import java.util.Properties;
065import java.util.ResourceBundle;
066import java.util.Stack;
067
068/**
069 *  Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the
070 *  heart and soul of JSPWiki : make sure you test properly anything that is added,
071 *  or else it breaks down horribly.
072 *
073 *  @since  2.4
074 */
075public class JSPWikiMarkupParser extends MarkupParser {
076
077    protected static final int              READ          = 0;
078    protected static final int              EDIT          = 1;
079    protected static final int              EMPTY         = 2;  // Empty message
080    protected static final int              LOCAL         = 3;
081    protected static final int              LOCALREF      = 4;
082    protected static final int              IMAGE         = 5;
083    protected static final int              EXTERNAL      = 6;
084    protected static final int              INTERWIKI     = 7;
085    protected static final int              IMAGELINK     = 8;
086    protected static final int              IMAGEWIKILINK = 9;
087    protected static final int              ATTACHMENT    = 10;
088
089    private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class );
090
091    private boolean        m_isbold       = false;
092    private boolean        m_isitalic     = false;
093    private boolean        m_istable      = false;
094    private boolean        m_isPre        = false;
095    private boolean        m_isEscaping   = false;
096    private boolean        m_isdefinition = false;
097    private boolean        m_isPreBlock   = false;
098
099    /** Contains style information, in multiple forms. */
100    private Stack<Boolean> m_styleStack   = new Stack<>();
101
102     // general list handling
103    private int            m_genlistlevel = 0;
104    private StringBuilder  m_genlistBulletBuffer = new StringBuilder(10);  // stores the # and * pattern
105    private boolean        m_allowPHPWikiStyleLists = true;
106
107    private boolean        m_isOpenParagraph = false;
108
109    /** Parser for extended link functionality. */
110    private LinkParser     m_linkParser = new LinkParser();
111
112    /** Keeps track of any plain text that gets put in the Text nodes */
113    private StringBuilder  m_plainTextBuf = new StringBuilder(20);
114
115    private Element        m_currentElement;
116
117    /** Keep track of duplicate header names.  */
118    private Map<String, Integer>   m_titleSectionCounter = new HashMap<>();
119
120    /** If true, consider CamelCase hyperlinks as well. */
121    public static final String     PROP_CAMELCASELINKS   = "jspwiki.translatorReader.camelCaseLinks";
122
123    /** If true, all hyperlinks are translated as well, regardless whether they
124        are surrounded by brackets. */
125    public static final String     PROP_PLAINURIS        = "jspwiki.translatorReader.plainUris";
126
127    /** If true, all outward attachment info links have a small link image appended. */
128    public static final String     PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
129
130    /** If true, then considers CamelCase links as well. */
131    private boolean                m_camelCaseLinks      = false;
132
133    /** If true, then generate special output for wysiwyg editing in certain cases */
134    private boolean                m_wysiwygEditorMode     = false;
135
136    /** If true, consider URIs that have no brackets as well. */
137    // FIXME: Currently reserved, but not used.
138    private boolean                m_plainUris           = false;
139
140    /** If true, all outward links use a small link image. */
141    private boolean                m_useOutlinkImage     = true;
142
143    private boolean                m_useAttachmentImage  = true;
144
145    /** If true, allows raw HTML. */
146    private boolean                m_allowHTML           = false;
147
148    private boolean                m_useRelNofollow      = false;
149
150    private PatternCompiler        m_compiler = new Perl5Compiler();
151
152    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
153
154    private PatternMatcher         m_camelCaseMatcher = new Perl5Matcher();
155    private Pattern                m_camelCasePattern;
156
157    private int                    m_rowNum              = 1;
158
159    private Heading                m_lastHeading         = null;
160
161    private static final String CAMELCASE_PATTERN     = "JSPWikiMarkupParser.camelCasePattern";
162
163    /**
164     *  Creates a markup parser.
165     *
166     *  @param context The WikiContext which controls the parsing
167     *  @param in Where the data is read from.
168     */
169    public JSPWikiMarkupParser( WikiContext context, Reader in )
170    {
171        super( context, in );
172        initialize();
173    }
174
175    // FIXME: parsers should be pooled for better performance.
176    private void initialize()
177    {
178        initInlineImagePatterns();
179
180        m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN );
181        if( m_camelCasePattern == null )
182        {
183            try
184            {
185                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,
186                                                         Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
187            }
188            catch( MalformedPatternException e )
189            {
190                log.fatal("Internal error: Someone put in a faulty pattern.",e);
191                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e);
192            }
193            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
194        }
195        //
196        //  Set the properties.
197        //
198        Properties props      = m_engine.getWikiProperties();
199
200        String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
201
202        if( cclinks != null )
203        {
204            m_camelCaseLinks = TextUtil.isPositive( cclinks );
205        }
206        else
207        {
208            m_camelCaseLinks  = TextUtil.getBooleanProperty( props,
209                                                             PROP_CAMELCASELINKS,
210                                                             m_camelCaseLinks );
211        }
212
213        Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE );
214        if( wysiwygVariable != null )
215        {
216            m_wysiwygEditorMode = wysiwygVariable.booleanValue();
217        }
218
219        m_plainUris           = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris );
220        m_useOutlinkImage     = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage );
221        m_useAttachmentImage  = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage );
222        m_allowHTML           = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML );
223        m_useRelNofollow      = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow );
224
225        if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null )
226        {
227            disableAccessRules();
228        }
229
230        m_context.getPage().setHasMetadata();
231    }
232
233    /**
234     *  Calls a transmutator chain.
235     *
236     *  @param list Chain to call
237     *  @param text Text that should be passed to the mutate() method of each of the mutators in the chain.
238     *  @return The result of the mutation.
239     */
240    protected String callMutatorChain( Collection< StringTransmutator > list, String text )
241    {
242        if( list == null || list.size() == 0 )
243        {
244            return text;
245        }
246
247        for( Iterator< StringTransmutator > i = list.iterator(); i.hasNext(); )
248        {
249            StringTransmutator m = i.next();
250
251            text = m.mutate( m_context, text );
252        }
253
254        return text;
255    }
256
257    /**
258     * Calls the heading listeners.
259     *
260     * @param param A Heading object.
261     */
262    protected void callHeadingListenerChain( Heading param )
263    {
264        List< HeadingListener > list = m_headingListenerChain;
265
266        for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); )
267        {
268            HeadingListener h = i.next();
269
270            h.headingAdded( m_context, param );
271        }
272    }
273
274    /**
275     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
276     *  if you really know what you are doing.
277     *
278     *  @param type One of the types above
279     *  @param link URL to which to link to
280     *  @param text Link text
281     *  @param section If a particular section identifier is required.
282     *  @return An A element.
283     *  @since 2.4.78
284     */
285    protected Element createAnchor(int type, String link, String text, String section)
286    {
287        text = escapeHTMLEntities( text );
288        section = escapeHTMLEntities( section );
289        Element el = new Element("a");
290        el.setAttribute("class",CLASS_TYPES[type]);
291        el.setAttribute("href",link+section);
292        el.addContent(text);
293        return el;
294    }
295
296    private Element makeLink( int type, String link, String text, String section, Iterator< Attribute > attributes )
297    {
298        Element el = null;
299
300        if( text == null ) text = link;
301
302        text = callMutatorChain( m_linkMutators, text );
303
304        section = (section != null) ? ("#"+section) : "";
305
306        // Make sure we make a link name that can be accepted
307        // as a valid URL.
308
309        if( link.length() == 0 )
310        {
311            type = EMPTY;
312        }
313        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
314
315        switch(type)
316        {
317            case READ:
318                el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section );
319                break;
320
321            case EDIT:
322                el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" );
323                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
324
325                break;
326
327            case EMPTY:
328                el = new Element("u").addContent(text);
329                break;
330
331                //
332                //  These two are for local references - footnotes and
333                //  references to footnotes.
334                //  We embed the page name (or whatever WikiContext gives us)
335                //  to make sure the links are unique across Wiki.
336                //
337            case LOCALREF:
338                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
339                break;
340
341            case LOCAL:
342                el = new Element("a").setAttribute("class",CLASS_FOOTNOTE);
343                el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1));
344                el.addContent("["+text+"]");
345                break;
346
347                //
348                //  With the image, external and interwiki types we need to
349                //  make sure nobody can put in Javascript or something else
350                //  annoying into the links themselves.  We do this by preventing
351                //  a haxor from stopping the link name short with quotes in
352                //  fillBuffer().
353                //
354            case IMAGE:
355                el = new Element("img").setAttribute("class","inline");
356                el.setAttribute("src",link);
357                el.setAttribute("alt",text);
358                break;
359
360            case IMAGELINK:
361                el = new Element("img").setAttribute("class","inline");
362                el.setAttribute("src",link);
363                el.setAttribute("alt",text);
364                el = createAnchor(IMAGELINK,text,"","").addContent(el);
365                break;
366
367            case IMAGEWIKILINK:
368                String pagelink = m_context.getURL(WikiContext.VIEW,text);
369                el = new Element("img").setAttribute("class","inline");
370                el.setAttribute("src",link);
371                el.setAttribute("alt",text);
372                el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el);
373                break;
374
375            case EXTERNAL:
376                el = createAnchor( EXTERNAL, link, text, section );
377                if( m_useRelNofollow ) el.setAttribute("rel","nofollow");
378                break;
379
380            case INTERWIKI:
381                el = createAnchor( INTERWIKI, link, text, section );
382                break;
383
384            case ATTACHMENT:
385                String attlink = m_context.getURL( WikiContext.ATTACH,
386                                                   link );
387
388                String infolink = m_context.getURL( WikiContext.INFO,
389                                                    link );
390
391                String imglink = m_context.getURL( WikiContext.NONE,
392                                                   "images/attachment_small.png" );
393
394                el = createAnchor( ATTACHMENT, attlink, text, "" );
395
396                if(  m_engine.getAttachmentManager().forceDownload( attlink ) )
397                {
398                    el.setAttribute("download", "");
399                }
400
401                pushElement(el);
402                popElement(el.getName());
403
404                if( m_useAttachmentImage )
405                {
406                    el = new Element("img").setAttribute("src",imglink);
407                    el.setAttribute("border","0");
408                    el.setAttribute("alt","(info)");
409
410                    el = new Element("a").setAttribute("href",infolink).addContent(el);
411                    el.setAttribute("class","infolink");
412                }
413                else
414                {
415                    el = null;
416                }
417                break;
418
419            default:
420                break;
421        }
422
423        if( el != null && attributes != null )
424        {
425            while( attributes.hasNext() )
426            {
427                Attribute attr = attributes.next();
428                if( attr != null )
429                {
430                    el.setAttribute(attr);
431                }
432            }
433        }
434
435        if( el != null )
436        {
437            flushPlainText();
438            m_currentElement.addContent( el );
439        }
440        return el;
441    }
442
443    /**
444     *  These are all of the HTML 4.01 block-level elements.
445     */
446    private static final String[] BLOCK_ELEMENTS = {
447        "address", "blockquote", "div", "dl", "fieldset", "form",
448        "h1", "h2", "h3", "h4", "h5", "h6",
449        "hr", "noscript", "ol", "p", "pre", "table", "ul"
450    };
451
452    private static boolean isBlockLevel( String name )
453    {
454        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
455    }
456
457    /**
458     *  This method peeks ahead in the stream until EOL and returns the result.
459     *  It will keep the buffers untouched.
460     *
461     *  @return The string from the current position to the end of line.
462     */
463
464    // FIXME: Always returns an empty line, even if the stream is full.
465    private String peekAheadLine()
466        throws IOException
467    {
468        String s = readUntilEOL().toString();
469
470        if( s.length() > PUSHBACK_BUFFER_SIZE )
471        {
472            log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters.  Attempting to recover...");
473            pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) );
474        }
475        else
476        {
477            try
478            {
479                pushBack( s );
480            }
481            catch( IOException e )
482            {
483                log.warn("Pushback failed: the line is probably too long.  Attempting to recover.");
484            }
485        }
486        return s;
487    }
488
489    private int flushPlainText()
490    {
491        int numChars = m_plainTextBuf.length();
492
493        if( numChars > 0 )
494        {
495            String buf;
496
497            if( !m_allowHTML )
498            {
499                buf = escapeHTMLEntities(m_plainTextBuf.toString());
500            }
501            else
502            {
503                buf = m_plainTextBuf.toString();
504            }
505            //
506            //  We must first empty the buffer because the side effect of
507            //  calling makeCamelCaseLink() is to call this routine.
508            //
509
510            m_plainTextBuf = new StringBuilder(20);
511
512            try
513            {
514                //
515                //  This is the heaviest part of parsing, and therefore we can
516                //  do some optimization here.
517                //
518                //  1) Only when the length of the buffer is big enough, we try to do the match
519                //
520
521                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 )
522                {
523                    // System.out.println("Buffer="+buf);
524
525                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) )
526                    {
527                        MatchResult result = m_camelCaseMatcher.getMatch();
528
529                        String firstPart = buf.substring(0,result.beginOffset(0));
530                        String prefix = result.group(1);
531
532                        if( prefix == null ) prefix = "";
533
534                        String camelCase = result.group(2);
535                        String protocol  = result.group(3);
536                        String uri       = protocol+result.group(4);
537                        buf              = buf.substring(result.endOffset(0));
538
539                        m_currentElement.addContent( firstPart );
540
541                        //
542                        //  Check if the user does not wish to do URL or WikiWord expansion
543                        //
544                        if( prefix.endsWith("~") || prefix.indexOf('[') != -1 )
545                        {
546                            if( prefix.endsWith("~") )
547                            {
548                                if( m_wysiwygEditorMode )
549                                {
550                                    m_currentElement.addContent( "~" );
551                                }
552                                prefix = prefix.substring(0,prefix.length()-1);
553                            }
554                            if( camelCase != null )
555                            {
556                                m_currentElement.addContent( prefix+camelCase );
557                            }
558                            else if( protocol != null )
559                            {
560                                m_currentElement.addContent( prefix+uri );
561                            }
562                            continue;
563                        }
564
565                        //
566                        //  Fine, then let's check what kind of a link this was
567                        //  and emit the proper elements
568                        //
569                        if( protocol != null )
570                        {
571                            char c = uri.charAt(uri.length()-1);
572                            if( c == '.' || c == ',' )
573                            {
574                                uri = uri.substring(0,uri.length()-1);
575                                buf = c + buf;
576                            }
577                            // System.out.println("URI match "+uri);
578                            m_currentElement.addContent( prefix );
579                            makeDirectURILink( uri );
580                        }
581                        else
582                        {
583                            // System.out.println("Matched: '"+camelCase+"'");
584                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
585                            // System.out.println("prefix="+prefix);
586                            m_currentElement.addContent( prefix );
587
588                            makeCamelCaseLink( camelCase );
589                        }
590                    }
591
592                    m_currentElement.addContent( buf );
593                }
594                else
595                {
596                    //
597                    //  No camelcase asked for, just add the elements
598                    //
599                    m_currentElement.addContent( buf );
600                }
601            }
602            catch( IllegalDataException e )
603            {
604                //
605                // Sometimes it's possible that illegal XML chars is added to the data.
606                // Here we make sure it does not stop parsing.
607                //
608                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
609            }
610        }
611
612        return numChars;
613    }
614
615    /**
616     *  Escapes XML entities in a HTML-compatible way (i.e. does not escape
617     *  entities that are already escaped).
618     *
619     *  @param buf
620     *  @return An escaped string.
621     */
622    private String escapeHTMLEntities(String buf)
623    {
624        StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
625
626        for( int i = 0; i < buf.length(); i++ )
627        {
628            char ch = buf.charAt(i);
629
630            if( ch == '<' )
631            {
632                tmpBuf.append("&lt;");
633            }
634            else if( ch == '>' )
635            {
636                tmpBuf.append("&gt;");
637            }
638            else if( ch == '\"' )
639            {
640                tmpBuf.append("&quot;");
641            }
642            else if( ch == '&' )
643            {
644                //
645                //  If the following is an XML entity reference (&#.*;) we'll
646                //  leave it as it is; otherwise we'll replace it with an &amp;
647                //
648
649                boolean isEntity = false;
650                StringBuilder entityBuf = new StringBuilder();
651
652                if( i < buf.length() -1 )
653                {
654                    for( int j = i; j < buf.length(); j++ )
655                    {
656                        char ch2 = buf.charAt(j);
657
658                        if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' )
659                        {
660                            entityBuf.append(ch2);
661
662                            if( ch2 == ';' )
663                            {
664                                isEntity = true;
665                                break;
666                            }
667                        }
668                        else
669                        {
670                            break;
671                        }
672                    }
673                }
674
675                if( isEntity )
676                {
677                    tmpBuf.append( entityBuf );
678                    i = i + entityBuf.length() - 1;
679                }
680                else
681                {
682                    tmpBuf.append("&amp;");
683                }
684
685            }
686            else
687            {
688                tmpBuf.append( ch );
689            }
690        }
691
692        return tmpBuf.toString();
693    }
694
695    private Element pushElement( Element e )
696    {
697        flushPlainText();
698        m_currentElement.addContent( e );
699        m_currentElement = e;
700
701        return e;
702    }
703
704    private Element addElement( Content e )
705    {
706        if( e != null )
707        {
708            flushPlainText();
709            m_currentElement.addContent( e );
710        }
711        return m_currentElement;
712    }
713
714    /**
715     *  All elements that can be empty by the HTML DTD.
716     */
717    //  Keep sorted.
718    private static final String[] EMPTY_ELEMENTS = {
719        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
720    };
721
722    /**
723     *  Goes through the current element stack and pops all elements until this
724     *  element is found - this essentially "closes" and element.
725     *
726     *  @param s
727     *  @return The new current element, or null, if there was no such element in the entire stack.
728     */
729    private Element popElement( String s )
730    {
731        int flushedBytes = flushPlainText();
732
733        Element currEl = m_currentElement;
734
735        while( currEl.getParentElement() != null )
736        {
737            if( currEl.getName().equals(s) && !currEl.isRootElement() )
738            {
739                m_currentElement = currEl.getParentElement();
740
741                //
742                //  Check if it's okay for this element to be empty.  Then we will
743                //  trick the JDOM generator into not generating an empty element,
744                //  by putting an empty string between the tags.  Yes, it's a kludge
745                //  but what'cha gonna do about it. :-)
746                //
747
748                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 )
749                {
750                    currEl.addContent("");
751                }
752
753                return m_currentElement;
754            }
755
756            currEl = currEl.getParentElement();
757        }
758
759        return null;
760    }
761
762
763    /**
764     *  Reads the stream until it meets one of the specified
765     *  ending characters, or stream end.  The ending character will be left
766     *  in the stream.
767     */
768    private String readUntil( String endChars )
769        throws IOException
770    {
771        StringBuilder sb = new StringBuilder( 80 );
772        int ch = nextToken();
773
774        while( ch != -1 )
775        {
776            if( ch == '\\' )
777            {
778                ch = nextToken();
779                if( ch == -1 )
780                {
781                    break;
782                }
783            }
784            else
785            {
786                if( endChars.indexOf((char)ch) != -1 )
787                {
788                    pushBack( ch );
789                    break;
790                }
791            }
792            sb.append( (char) ch );
793            ch = nextToken();
794        }
795
796        return sb.toString();
797    }
798
799    /**
800     *  Reads the stream while the characters that have been specified are
801     *  in the stream, returning then the result as a String.
802     */
803    private String readWhile( String endChars )
804        throws IOException
805    {
806        StringBuilder sb = new StringBuilder( 80 );
807        int ch = nextToken();
808
809        while( ch != -1 )
810        {
811            if( endChars.indexOf((char)ch) == -1 )
812            {
813                pushBack( ch );
814                break;
815            }
816
817            sb.append( (char) ch );
818            ch = nextToken();
819        }
820
821        return sb.toString();
822    }
823
824    private JSPWikiMarkupParser m_cleanTranslator;
825
826    /**
827     *  Does a lazy init.  Otherwise, we would get into a situation
828     *  where HTMLRenderer would try and boot a TranslatorReader before
829     *  the TranslatorReader it is contained by is up.
830     */
831    private JSPWikiMarkupParser getCleanTranslator()
832    {
833        if( m_cleanTranslator == null )
834        {
835            WikiContext dummyContext = new WikiContext( m_engine,
836                                                        m_context.getHttpRequest(),
837                                                        m_context.getPage() );
838            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
839
840            m_cleanTranslator.m_allowHTML = true;
841        }
842
843        return m_cleanTranslator;
844    }
845    /**
846     *  Modifies the "hd" parameter to contain proper values.  Because
847     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
848     *  % after url encoding with '_'.
849     *  <p>
850     *  Counts also duplicate headings (= headings with similar name), and
851     *  attaches a counter.
852     */
853    private String makeHeadingAnchor( String baseName, String title, Heading hd )
854    {
855        hd.m_titleText = title;
856        title = MarkupParser.wikifyLink( title );
857
858        hd.m_titleSection = m_engine.encodeName(title);
859
860        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) )
861        {
862            Integer count = m_titleSectionCounter.get( hd.m_titleSection );
863            count = count + 1;
864            m_titleSectionCounter.put( hd.m_titleSection, count );
865            hd.m_titleSection += "-" + count;
866        }
867        else
868        {
869            m_titleSectionCounter.put( hd.m_titleSection, 1 );
870        }
871
872        hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+
873                           "-"+hd.m_titleSection;
874        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
875        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
876
877        return hd.m_titleAnchor;
878    }
879
880    private String makeSectionTitle( String title )
881    {
882        title = title.trim();
883        String outTitle;
884
885        try
886        {
887            JSPWikiMarkupParser dtr = getCleanTranslator();
888            dtr.setInputReader( new StringReader(title) );
889
890            CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse());
891
892            outTitle = ctt.getString();
893        }
894        catch( IOException e )
895        {
896            log.fatal("CleanTranslator not working", e);
897            throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() , e);
898        }
899
900        return outTitle;
901    }
902
903    /**
904     *  Returns XHTML for the heading.
905     *
906     *  @param level The level of the heading.  @see Heading
907     *  @param title the title for the heading
908     *  @param hd a List to which heading should be added
909     *  @return An Element containing the heading
910     */
911    public Element makeHeading( int level, String title, Heading hd )
912    {
913        Element el = null;
914
915        String pageName = m_context.getPage().getName();
916
917        String outTitle = makeSectionTitle( title );
918
919        hd.m_level = level;
920
921        switch( level )
922        {
923          case Heading.HEADING_SMALL:
924            el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
925            break;
926
927          case Heading.HEADING_MEDIUM:
928            el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
929            break;
930
931          case Heading.HEADING_LARGE:
932            el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
933            break;
934
935          default:
936            throw new InternalWikiException("Illegal heading type "+level);
937        }
938
939
940        return el;
941    }
942
943    /**
944     *  When given a link to a WikiName, we just return
945     *  a proper HTML link for it.  The local link mutator
946     *  chain is also called.
947     */
948    private Element makeCamelCaseLink( String wikiname )
949    {
950        String matchedLink = m_linkParsingOperations.linkIfExists( wikiname );
951
952        callMutatorChain( m_localLinkMutatorChain, wikiname );
953
954        if( matchedLink != null ) {
955            makeLink( READ, matchedLink, wikiname, null, null );
956        } else {
957            makeLink( EDIT, wikiname, wikiname, null, null );
958        }
959
960        return m_currentElement;
961    }
962
963    /** Holds the image URL for the duration of this parser */
964    private String m_outlinkImageURL = null;
965
966    /**
967     *  Returns an element for the external link image (out.png).  However,
968     *  this method caches the URL for the lifetime of this MarkupParser,
969     *  because it's commonly used, and we'll end up with possibly hundreds
970     *  our thousands of references to it...  It's a lot faster, too.
971     *
972     *  @return  An element containing the HTML for the outlink image.
973     */
974    private Element outlinkImage()
975    {
976        Element el = null;
977
978        if( m_useOutlinkImage )
979        {
980            if( m_outlinkImageURL == null )
981            {
982                m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE );
983            }
984
985            el = new Element( "img" ).setAttribute( "class", OUTLINK );
986            el.setAttribute( "src", m_outlinkImageURL );
987            el.setAttribute( "alt","" );
988        }
989
990        return el;
991    }
992
993    /**
994     *  Takes an URL and turns it into a regular wiki link.  Unfortunately,
995     *  because of the way that flushPlainText() works, it already encodes
996     *  all of the XML entities.  But so does WikiContext.getURL(), so we
997     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
998     *  <p>
999     *  What a crappy problem.
1000     *
1001     * @param url
1002     * @return An anchor Element containing the link.
1003     */
1004    private Element makeDirectURILink( String url )
1005    {
1006        Element result;
1007        String last = null;
1008
1009        if( url.endsWith(",") || url.endsWith(".") )
1010        {
1011            last = url.substring( url.length()-1 );
1012            url  = url.substring( 0, url.length()-1 );
1013        }
1014
1015        callMutatorChain( m_externalLinkMutatorChain, url );
1016
1017        if( m_linkParsingOperations.isImageLink( url ) )
1018        {
1019            result = handleImageLink( StringUtils.replace(url,"&amp;","&"), url, false );
1020        }
1021        else
1022        {
1023            result = makeLink( EXTERNAL, StringUtils.replace(url,"&amp;","&"), url, null, null );
1024            addElement( outlinkImage() );
1025        }
1026
1027        if( last != null )
1028        {
1029            m_plainTextBuf.append(last);
1030        }
1031
1032        return result;
1033    }
1034
1035    /**
1036     *  Image links are handled differently:
1037     *  1. If the text is a WikiName of an existing page,
1038     *     it gets linked.
1039     *  2. If the text is an external link, then it is inlined.
1040     *  3. Otherwise it becomes an ALT text.
1041     *
1042     *  @param reallink The link to the image.
1043     *  @param link     Link text portion, may be a link to somewhere else.
1044     *  @param hasLinkText If true, then the defined link had a link text available.
1045     *                  This means that the link text may be a link to a wiki page,
1046     *                  or an external resource.
1047     */
1048
1049    // FIXME: isExternalLink() is called twice.
1050    private Element handleImageLink( String reallink, String link, boolean hasLinkText )
1051    {
1052        String possiblePage = MarkupParser.cleanLink( link );
1053
1054        if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText )
1055        {
1056            return makeLink( IMAGELINK, reallink, link, null, null );
1057        }
1058        else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText )
1059        {
1060            // System.out.println("Orig="+link+", Matched: "+matchedLink);
1061            callMutatorChain( m_localLinkMutatorChain, possiblePage );
1062
1063            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
1064        }
1065        else
1066        {
1067            return makeLink( IMAGE, reallink, link, null, null );
1068        }
1069    }
1070
1071    private Element handleAccessRule( String ruleLine ) {
1072        if( m_wysiwygEditorMode ) {
1073            m_currentElement.addContent( "[" + ruleLine + "]" );
1074        }
1075
1076        if( !m_parseAccessRules ) {
1077            return m_currentElement;
1078        }
1079        final WikiPage page = m_context.getRealPage();
1080        // UserDatabase db = m_context.getEngine().getUserDatabase();
1081
1082        if( ruleLine.startsWith( "{" ) ) {
1083            ruleLine = ruleLine.substring( 1 );
1084        }
1085
1086        if( ruleLine.endsWith( "}" ) ) {
1087            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1088        }
1089
1090        if( log.isDebugEnabled() ) {
1091            log.debug("page="+page.getName()+", ACL = "+ruleLine);
1092        }
1093
1094        try {
1095            final Acl acl = m_engine.getAclManager().parseAcl( page, ruleLine );
1096            page.setAcl( acl );
1097
1098            if( log.isDebugEnabled() ) {
1099                log.debug( acl.toString() );
1100            }
1101        } catch( final WikiSecurityException wse ) {
1102            return makeError( wse.getMessage() );
1103        }
1104
1105        return m_currentElement;
1106    }
1107
1108    /**
1109     *  Handles metadata setting [{SET foo=bar}]
1110     */
1111    private Element handleMetadata( final String link ) {
1112        if( m_wysiwygEditorMode ) {
1113            m_currentElement.addContent( "[" + link + "]" );
1114        }
1115
1116        try {
1117            final String args = link.substring( link.indexOf(' '), link.length()-1 );
1118            final String name = args.substring( 0, args.indexOf('=') ).trim();
1119            String val  = args.substring( args.indexOf('=')+1 ).trim();
1120
1121            if( val.startsWith("'") ) {
1122                val = val.substring( 1 );
1123            }
1124            if( val.endsWith("'") ) {
1125                val = val.substring( 0, val.length()-1 );
1126            }
1127
1128            // log.debug("SET name='"+name+"', value='"+val+"'.");
1129
1130            if( name.length() > 0 && val.length() > 0 ) {
1131                val = m_engine.getVariableManager().expandVariables( m_context, val );
1132                m_context.getPage().setAttribute( name, val );
1133            }
1134        } catch( final Exception e ) {
1135            final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1136            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
1137        }
1138
1139        return m_currentElement;
1140    }
1141
1142    /**
1143     *  Emits a processing instruction that will disable markup escaping. This is
1144     *  very useful if you want to emit HTML directly into the stream.
1145     *
1146     */
1147    private void disableOutputEscaping()
1148    {
1149        addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") );
1150    }
1151
1152    /**
1153     *  Gobbles up all hyperlinks that are encased in square brackets.
1154     */
1155    private Element handleHyperlinks( String linktext, int pos )
1156    {
1157        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1158
1159        StringBuilder sb = new StringBuilder(linktext.length()+80);
1160
1161        if( m_linkParsingOperations.isAccessRule( linktext ) )
1162        {
1163            return handleAccessRule( linktext );
1164        }
1165
1166        if( m_linkParsingOperations.isMetadata( linktext ) )
1167        {
1168            return handleMetadata( linktext );
1169        }
1170
1171        if( m_linkParsingOperations.isPluginLink( linktext ) )
1172        {
1173            try
1174            {
1175                PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
1176                //
1177                //  This might sometimes fail, especially if there is something which looks
1178                //  like a plugin invocation but is really not.
1179                //
1180                if( pluginContent != null )
1181                {
1182                    addElement( pluginContent );
1183
1184                    pluginContent.executeParse( m_context );
1185                }
1186            }
1187            catch( PluginException e )
1188            {
1189                log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
1190                //log.info( "Root cause:",e.getRootThrowable() );
1191                if( !m_wysiwygEditorMode )
1192                {
1193                    ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE );
1194                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ),
1195                                                                        m_context.getRealPage().getWiki(),
1196                                                                        m_context.getRealPage().getName(),
1197                                                                        e.getMessage() ) ) );
1198                }
1199            }
1200
1201            return m_currentElement;
1202        }
1203
1204        try
1205        {
1206            LinkParser.Link link = m_linkParser.parse(linktext);
1207            linktext       = link.getText();
1208            String linkref = link.getReference();
1209
1210            //
1211            //  Yes, we now have the components separated.
1212            //  linktext = the text the link should have
1213            //  linkref  = the url or page name.
1214            //
1215            //  In many cases these are the same.  [linktext|linkref].
1216            //
1217            if( m_linkParsingOperations.isVariableLink( linktext ) )
1218            {
1219                Content el = new VariableContent(linktext);
1220
1221                addElement( el );
1222            }
1223            else if( m_linkParsingOperations.isExternalLink( linkref ) )
1224            {
1225                // It's an external link, out of this Wiki
1226
1227                callMutatorChain( m_externalLinkMutatorChain, linkref );
1228
1229                if( m_linkParsingOperations.isImageLink( linkref ) )
1230                {
1231                    handleImageLink( linkref, linktext, link.hasReference() );
1232                }
1233                else
1234                {
1235                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
1236                    addElement( outlinkImage() );
1237                }
1238            }
1239            else if( link.isInterwikiLink() )
1240            {
1241                // It's an interwiki link
1242                // InterWiki links also get added to external link chain
1243                // after the links have been resolved.
1244
1245                // FIXME: There is an interesting issue here:  We probably should
1246                //        URLEncode the wikiPage, but we can't since some of the
1247                //        Wikis use slashes (/), which won't survive URLEncoding.
1248                //        Besides, we don't know which character set the other Wiki
1249                //        is using, so you'll have to write the entire name as it appears
1250                //        in the URL.  Bugger.
1251
1252                String extWiki  = link.getExternalWiki();
1253                String wikiPage = link.getExternalWikiPage();
1254
1255                if( m_wysiwygEditorMode )
1256                {
1257                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
1258                }
1259                else
1260                {
1261                    String urlReference = m_engine.getInterWikiURL( extWiki );
1262
1263                    if( urlReference != null )
1264                    {
1265                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1266                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
1267
1268                        if( m_linkParsingOperations.isImageLink(urlReference) )
1269                        {
1270                            handleImageLink( urlReference, linktext, link.hasReference() );
1271                        }
1272                        else
1273                        {
1274                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
1275                        }
1276
1277                        if( m_linkParsingOperations.isExternalLink(urlReference) )
1278                        {
1279                            addElement( outlinkImage() );
1280                        }
1281                    }
1282                    else
1283                    {
1284                        Object[] args = { escapeHTMLEntities(extWiki) };
1285
1286                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
1287                    }
1288                }
1289            }
1290            else if( linkref.startsWith("#") )
1291            {
1292                // It defines a local footnote
1293                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1294            }
1295            else if( TextUtil.isNumber( linkref ) )
1296            {
1297                // It defines a reference to a local footnote
1298                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1299            }
1300            else
1301            {
1302                int hashMark = -1;
1303
1304                //
1305                //  Internal wiki link, but is it an attachment link?
1306                //
1307                String attachment = m_engine.getAttachmentManager().getAttachmentInfoName( m_context, linkref );
1308                if( attachment != null )
1309                {
1310                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1311
1312                    if( m_linkParsingOperations.isImageLink( linkref ) )
1313                    {
1314                        attachment = m_context.getURL( WikiContext.ATTACH, attachment );
1315                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1316                    }
1317                    else
1318                    {
1319                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1320                    }
1321                }
1322                else if( (hashMark = linkref.indexOf('#')) != -1 )
1323                {
1324                    // It's an internal Wiki link, but to a named section
1325
1326                    String namedSection = linkref.substring( hashMark+1 );
1327                    linkref = linkref.substring( 0, hashMark );
1328
1329                    linkref = MarkupParser.cleanLink( linkref );
1330
1331                    callMutatorChain( m_localLinkMutatorChain, linkref );
1332
1333                    String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1334                    if( matchedLink != null ) {
1335                        String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection));
1336                        sectref = sectref.replace('%', '_');
1337                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1338                    } else {
1339                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1340                    }
1341                }
1342                else
1343                {
1344                    // It's an internal Wiki link
1345                    linkref = MarkupParser.cleanLink( linkref );
1346
1347                    callMutatorChain( m_localLinkMutatorChain, linkref );
1348
1349                    String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1350                    if( matchedLink != null ) {
1351                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1352                    } else {
1353                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1354                    }
1355                }
1356            }
1357        }
1358        catch( ParseException e )
1359        {
1360            log.info("Parser failure: ",e);
1361            Object[] args = { e.getMessage() };
1362            addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1363        }
1364
1365        return m_currentElement;
1366    }
1367
1368    /**
1369     *  Pushes back any string that has been read.  It will obviously
1370     *  be pushed back in a reverse order.
1371     *
1372     *  @since 2.1.77
1373     */
1374    private void pushBack( String s )
1375        throws IOException
1376    {
1377        for( int i = s.length()-1; i >= 0; i-- )
1378        {
1379            pushBack( s.charAt(i) );
1380        }
1381    }
1382
1383    private Element handleBackslash()
1384        throws IOException
1385    {
1386        int ch = nextToken();
1387
1388        if( ch == '\\' )
1389        {
1390            int ch2 = nextToken();
1391
1392            if( ch2 == '\\' )
1393            {
1394                pushElement( new Element("br").setAttribute("clear","all"));
1395                return popElement("br");
1396            }
1397
1398            pushBack( ch2 );
1399
1400            pushElement( new Element("br") );
1401            return popElement("br");
1402        }
1403
1404        pushBack( ch );
1405
1406        return null;
1407    }
1408
1409    private Element handleUnderscore()
1410        throws IOException
1411    {
1412        int ch = nextToken();
1413        Element el = null;
1414
1415        if( ch == '_' )
1416        {
1417            if( m_isbold )
1418            {
1419                el = popElement("b");
1420            }
1421            else
1422            {
1423                el = pushElement( new Element("b") );
1424            }
1425            m_isbold = !m_isbold;
1426        }
1427        else
1428        {
1429            pushBack( ch );
1430        }
1431
1432        return el;
1433    }
1434
1435
1436    /**
1437     *  For example: italics.
1438     */
1439    private Element handleApostrophe()
1440        throws IOException
1441    {
1442        int ch = nextToken();
1443        Element el = null;
1444
1445        if( ch == '\'' )
1446        {
1447            if( m_isitalic )
1448            {
1449                el = popElement("i");
1450            }
1451            else
1452            {
1453                el = pushElement( new Element("i") );
1454            }
1455            m_isitalic = !m_isitalic;
1456        }
1457        else
1458        {
1459            pushBack( ch );
1460        }
1461
1462        return el;
1463    }
1464
1465    private Element handleOpenbrace( boolean isBlock )
1466        throws IOException
1467    {
1468        int ch = nextToken();
1469
1470        if( ch == '{' )
1471        {
1472            int ch2 = nextToken();
1473
1474            if( ch2 == '{' )
1475            {
1476                m_isPre = true;
1477                m_isEscaping = true;
1478                m_isPreBlock = isBlock;
1479
1480                if( isBlock )
1481                {
1482                    startBlockLevel();
1483                    return pushElement( new Element("pre") );
1484                }
1485
1486                return pushElement( new Element("span").setAttribute("class","inline-code") );
1487            }
1488
1489            pushBack( ch2 );
1490
1491            return pushElement( new Element("tt") );
1492        }
1493
1494        pushBack( ch );
1495
1496        return null;
1497    }
1498
1499    /**
1500     *  Handles both }} and }}}
1501     */
1502    private Element handleClosebrace()
1503        throws IOException
1504    {
1505        int ch2 = nextToken();
1506
1507        if( ch2 == '}' )
1508        {
1509            int ch3 = nextToken();
1510
1511            if( ch3 == '}' )
1512            {
1513                if( m_isPre )
1514                {
1515                    if( m_isPreBlock )
1516                    {
1517                        popElement( "pre" );
1518                    }
1519                    else
1520                    {
1521                        popElement( "span" );
1522                    }
1523
1524                    m_isPre = false;
1525                    m_isEscaping = false;
1526                    return m_currentElement;
1527                }
1528
1529                m_plainTextBuf.append("}}}");
1530                return m_currentElement;
1531            }
1532
1533            pushBack( ch3 );
1534
1535            if( !m_isEscaping )
1536            {
1537                return popElement("tt");
1538            }
1539        }
1540
1541        pushBack( ch2 );
1542
1543        return null;
1544    }
1545
1546    private Element handleDash()
1547        throws IOException
1548    {
1549        int ch = nextToken();
1550
1551        if( ch == '-' )
1552        {
1553            int ch2 = nextToken();
1554
1555            if( ch2 == '-' )
1556            {
1557                int ch3 = nextToken();
1558
1559                if( ch3 == '-' )
1560                {
1561                    // Empty away all the rest of the dashes.
1562                    // Do not forget to return the first non-match back.
1563                    do
1564                    {
1565                        ch = nextToken();
1566                    }
1567                    while ( ch == '-' );
1568
1569                    pushBack(ch);
1570                    startBlockLevel();
1571                    pushElement( new Element("hr") );
1572                    return popElement( "hr" );
1573                }
1574
1575                pushBack( ch3 );
1576            }
1577            pushBack( ch2 );
1578        }
1579
1580        pushBack( ch );
1581
1582        return null;
1583    }
1584
1585    private Element handleHeading()
1586        throws IOException
1587    {
1588        Element el = null;
1589
1590        int ch  = nextToken();
1591
1592        Heading hd = new Heading();
1593
1594        if( ch == '!' )
1595        {
1596            int ch2 = nextToken();
1597
1598            if( ch2 == '!' )
1599            {
1600                String title = peekAheadLine();
1601
1602                el = makeHeading( Heading.HEADING_LARGE, title, hd);
1603            }
1604            else
1605            {
1606                pushBack( ch2 );
1607                String title = peekAheadLine();
1608                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1609            }
1610        }
1611        else
1612        {
1613            pushBack( ch );
1614            String title = peekAheadLine();
1615            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1616        }
1617
1618        callHeadingListenerChain( hd );
1619
1620        m_lastHeading = hd;
1621
1622        if( el != null ) pushElement(el);
1623
1624        return el;
1625    }
1626
1627    /**
1628     *  Reads the stream until the next EOL or EOF.  Note that it will also read the
1629     *  EOL from the stream.
1630     */
1631    private StringBuilder readUntilEOL()
1632        throws IOException
1633    {
1634        int ch;
1635        StringBuilder buf = new StringBuilder( 256 );
1636
1637        while( true )
1638        {
1639            ch = nextToken();
1640
1641            if( ch == -1 )
1642                break;
1643
1644            buf.append( (char) ch );
1645
1646            if( ch == '\n' )
1647                break;
1648        }
1649        return buf;
1650    }
1651
1652    /** Controls whether italic is restarted after a paragraph shift */
1653
1654    private boolean m_restartitalic = false;
1655    private boolean m_restartbold   = false;
1656
1657    private boolean m_newLine;
1658
1659    /**
1660     *  Starts a block level element, therefore closing
1661     *  a potential open paragraph tag.
1662     */
1663    private void startBlockLevel()
1664    {
1665        // These may not continue over block level limits in XHTML
1666
1667        popElement("i");
1668        popElement("b");
1669        popElement("tt");
1670
1671        if( m_isOpenParagraph )
1672        {
1673            m_isOpenParagraph = false;
1674            popElement("p");
1675            m_plainTextBuf.append("\n"); // Just small beautification
1676        }
1677
1678        m_restartitalic = m_isitalic;
1679        m_restartbold   = m_isbold;
1680
1681        m_isitalic = false;
1682        m_isbold   = false;
1683    }
1684
1685    private static String getListType( char c )
1686    {
1687        if( c == '*' )
1688        {
1689            return "ul";
1690        }
1691        else if( c == '#' )
1692        {
1693            return "ol";
1694        }
1695        throw new InternalWikiException("Parser got faulty list type: "+c);
1696    }
1697    /**
1698     *  Like original handleOrderedList() and handleUnorderedList()
1699     *  however handles both ordered ('#') and unordered ('*') mixed together.
1700     */
1701
1702    // FIXME: Refactor this; it's a bit messy.
1703
1704    private Element handleGeneralList()
1705        throws IOException
1706    {
1707         startBlockLevel();
1708
1709         String strBullets = readWhile( "*#" );
1710         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
1711         int numBullets = strBullets.length();
1712
1713         // override the beginning portion of bullet pattern to be like the previous
1714         // to simulate PHPWiki style lists
1715
1716         if(m_allowPHPWikiStyleLists)
1717         {
1718             // only substitute if different
1719             if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1720                   (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) )
1721             {
1722                 if(numBullets <= m_genlistlevel)
1723                 {
1724                     // Substitute all but the last character (keep the expressed bullet preference)
1725                     strBullets  = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "")
1726                                   + strBullets.substring(numBullets-1, numBullets);
1727                 }
1728                 else
1729                 {
1730                     strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets);
1731                 }
1732             }
1733         }
1734
1735         //
1736         //  Check if this is still of the same type
1737         //
1738         if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1739            (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) )
1740         {
1741             if( numBullets > m_genlistlevel )
1742             {
1743                 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) );
1744
1745                 for( ; m_genlistlevel < numBullets; m_genlistlevel++ )
1746                 {
1747                     // bullets are growing, get from new bullet list
1748                     pushElement( new Element("li") );
1749                     pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) ));
1750                 }
1751             }
1752             else if( numBullets < m_genlistlevel )
1753             {
1754                 //  Close the previous list item.
1755                 // buf.append( m_renderer.closeListItem() );
1756                 popElement( "li" );
1757
1758                 for( ; m_genlistlevel > numBullets; m_genlistlevel-- )
1759                 {
1760                     // bullets are shrinking, get from old bullet list
1761
1762                     popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1763                     if( m_genlistlevel > 0 )
1764                     {
1765                         popElement( "li" );
1766                     }
1767
1768                 }
1769             }
1770             else
1771             {
1772                 if( m_genlistlevel > 0 )
1773                 {
1774                     popElement( "li" );
1775                 }
1776             }
1777         }
1778         else
1779         {
1780             //
1781             //  The pattern has changed, unwind and restart
1782             //
1783             int  numEqualBullets;
1784             int  numCheckBullets;
1785
1786             // find out how much is the same
1787             numEqualBullets = 0;
1788             numCheckBullets = Math.min(numBullets,m_genlistlevel);
1789
1790             while( numEqualBullets < numCheckBullets )
1791             {
1792                 // if the bullets are equal so far, keep going
1793                 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets))
1794                     numEqualBullets++;
1795                 // otherwise giveup, we have found how many are equal
1796                 else
1797                     break;
1798             }
1799
1800             //unwind
1801             for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- )
1802             {
1803                 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) );
1804                 if( m_genlistlevel > numBullets )
1805                 {
1806                     popElement("li");
1807                 }
1808             }
1809
1810             //rewind
1811
1812             pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) );
1813             for(int i = numEqualBullets; i < numBullets; i++)
1814             {
1815                 pushElement( new Element("li") );
1816                 pushElement( new Element( getListType( strBullets.charAt(i) ) ) );
1817             }
1818             m_genlistlevel = numBullets;
1819         }
1820
1821         //
1822         //  Push a new list item, and eat away any extra whitespace
1823         //
1824         pushElement( new Element("li") );
1825         readWhile(" ");
1826
1827         // work done, remember the new bullet list (in place of old one)
1828         m_genlistBulletBuffer.setLength(0);
1829         m_genlistBulletBuffer.append(strBullets);
1830
1831         return m_currentElement;
1832    }
1833
1834    private Element unwindGeneralList()
1835    {
1836        //unwind
1837        for( ; m_genlistlevel > 0; m_genlistlevel-- )
1838        {
1839            popElement( "li" );
1840            popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1841        }
1842
1843        m_genlistBulletBuffer.setLength(0);
1844
1845        return null;
1846    }
1847
1848
1849    private Element handleDefinitionList()
1850        throws IOException
1851    {
1852        if( !m_isdefinition )
1853        {
1854            m_isdefinition = true;
1855
1856            startBlockLevel();
1857
1858            pushElement( new Element("dl") );
1859            return pushElement( new Element("dt") );
1860        }
1861
1862        return null;
1863    }
1864
1865    private Element handleOpenbracket()
1866        throws IOException
1867    {
1868        StringBuilder sb = new StringBuilder(40);
1869        int pos = getPosition();
1870        int ch = nextToken();
1871        boolean isPlugin = false;
1872
1873        if( ch == '[' )
1874        {
1875            if( m_wysiwygEditorMode )
1876            {
1877                sb.append( '[' );
1878            }
1879
1880            sb.append( (char)ch );
1881
1882            while( (ch = nextToken()) == '[' )
1883            {
1884                sb.append( (char)ch );
1885            }
1886        }
1887
1888
1889        if( ch == '{' )
1890        {
1891            isPlugin = true;
1892        }
1893
1894        pushBack( ch );
1895
1896        if( sb.length() > 0 )
1897        {
1898            m_plainTextBuf.append( sb );
1899            return m_currentElement;
1900        }
1901
1902        //
1903        //  Find end of hyperlink
1904        //
1905
1906        ch = nextToken();
1907        int nesting = 1;    // Check for nested plugins
1908
1909        while( ch != -1 )
1910        {
1911            int ch2 = nextToken(); pushBack(ch2);
1912
1913            if( isPlugin )
1914            {
1915                if( ch == '[' && ch2 == '{' )
1916                {
1917                    nesting++;
1918                }
1919                else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' )
1920                {
1921                    break;
1922                }
1923                else if( ch == '}' && ch2 == ']' )
1924                {
1925                    // NB: This will be decremented once at the end
1926                    nesting--;
1927                }
1928            }
1929            else
1930            {
1931                if( ch == ']' )
1932                {
1933                    break;
1934                }
1935            }
1936
1937            sb.append( (char) ch );
1938
1939            ch = nextToken();
1940        }
1941
1942        //
1943        //  If the link is never finished, do some tricks to display the rest of the line
1944        //  unchanged.
1945        //
1946        if( ch == -1 )
1947        {
1948            log.debug("Warning: unterminated link detected!");
1949            m_isEscaping = true;
1950            m_plainTextBuf.append( sb );
1951            flushPlainText();
1952            m_isEscaping = false;
1953            return m_currentElement;
1954        }
1955
1956        return handleHyperlinks( sb.toString(), pos );
1957    }
1958
1959    /**
1960     *  Reads the stream until the current brace is closed or stream end.
1961     */
1962    private String readBraceContent( char opening, char closing )
1963        throws IOException
1964    {
1965        StringBuilder sb = new StringBuilder(40);
1966        int braceLevel = 1;
1967        int ch;
1968        while(( ch = nextToken() ) != -1 )
1969        {
1970            if( ch == '\\' )
1971            {
1972                continue;
1973            }
1974            else if ( ch == opening )
1975            {
1976                braceLevel++;
1977            }
1978            else if ( ch == closing )
1979            {
1980                braceLevel--;
1981                if (braceLevel==0)
1982                {
1983                  break;
1984                }
1985            }
1986            sb.append( (char)ch );
1987        }
1988        return sb.toString();
1989    }
1990
1991
1992    /**
1993     *  Handles constructs of type %%(style) and %%class
1994     * @param newLine
1995     * @return An Element containing the div or span, depending on the situation.
1996     * @throws IOException
1997     */
1998    private Element handleDiv( boolean newLine )
1999        throws IOException
2000    {
2001        int ch = nextToken();
2002        Element el = null;
2003
2004        if( ch == '%' )
2005        {
2006            String style = null;
2007            String clazz = null;
2008
2009            ch = nextToken();
2010
2011            //
2012            //  Style or class?
2013            //
2014            if( ch == '(' )
2015            {
2016                style = readBraceContent('(',')');
2017            }
2018            else if( Character.isLetter( (char) ch ) )
2019            {
2020                pushBack( ch );
2021                clazz = readUntil( "( \t\n\r" );
2022                //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters
2023                //CSS Classnames can contain only the characters [a-zA-Z0-9] and
2024                //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_".
2025                //They cannot start with a digit, two hyphens, or a hyphen followed by a digit.
2026
2027                //(1) replace '.' by spaces, allowing multiple classnames on a div or span
2028                //(2) remove any invalid character
2029                if( clazz != null){
2030
2031                    clazz = clazz.replace('.', ' ')
2032                                 .replaceAll("[^\\s-_\\w\\x200-\\x377]+","");
2033
2034                }
2035                ch = nextToken();
2036
2037                //check for %%class1.class2( style information )
2038                if( ch == '(' )
2039                {
2040                    style = readBraceContent('(',')');
2041                }
2042                //
2043                //  Pop out only spaces, so that the upcoming EOL check does not check the
2044                //  next line.
2045                //
2046                else if( ch == '\n' || ch == '\r' )
2047                {
2048                    pushBack(ch);
2049                }
2050            }
2051            else
2052            {
2053                //
2054                // Anything else stops.
2055                //
2056
2057                pushBack(ch);
2058
2059                try
2060                {
2061                    Boolean isSpan = m_styleStack.pop();
2062
2063                    if( isSpan == null )
2064                    {
2065                        // Fail quietly
2066                    }
2067                    else if( isSpan.booleanValue() )
2068                    {
2069                        el = popElement( "span" );
2070                    }
2071                    else
2072                    {
2073                        el = popElement( "div" );
2074                    }
2075                }
2076                catch( EmptyStackException e )
2077                {
2078                    log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened.");
2079                    return m_currentElement;
2080                }
2081
2082                return el;
2083            }
2084
2085            //
2086            //  Check if there is an attempt to do something nasty
2087            //
2088
2089            try
2090            {
2091                style = StringEscapeUtils.unescapeHtml4(style);
2092                if( style != null && style.indexOf("javascript:") != -1 )
2093                {
2094                    log.debug("Attempt to output javascript within CSS:"+style);
2095                    ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2096                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
2097                }
2098            }
2099            catch( NumberFormatException e )
2100            {
2101                //
2102                //  If there are unknown entities, we don't want the parser to stop.
2103                //
2104                ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2105                String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
2106                return addElement( makeError( msg ) );
2107            }
2108
2109            //
2110            //  Decide if we should open a div or a span?
2111            //
2112            String eol = peekAheadLine();
2113
2114            if( eol.trim().length() > 0 )
2115            {
2116                // There is stuff after the class
2117
2118                el = new Element("span");
2119
2120                m_styleStack.push( Boolean.TRUE );
2121            }
2122            else
2123            {
2124                startBlockLevel();
2125                el = new Element("div");
2126                m_styleStack.push( Boolean.FALSE );
2127            }
2128
2129            if( style != null ) el.setAttribute("style", style);
2130            if( clazz != null ) el.setAttribute("class", clazz);
2131            el = pushElement( el );
2132
2133            return el;
2134        }
2135
2136        pushBack(ch);
2137
2138        return el;
2139    }
2140
2141    private Element handleSlash( boolean newLine )
2142        throws IOException
2143    {
2144        int ch = nextToken();
2145
2146        pushBack(ch);
2147        if( ch == '%' && !m_styleStack.isEmpty() )
2148        {
2149            return handleDiv( newLine );
2150        }
2151
2152        return null;
2153    }
2154
2155    private Element handleBar( boolean newLine )
2156        throws IOException
2157    {
2158        Element el = null;
2159
2160        if( !m_istable && !newLine )
2161        {
2162            return null;
2163        }
2164
2165        //
2166        //  If the bar is in the first column, we will either start
2167        //  a new table or continue the old one.
2168        //
2169
2170        if( newLine )
2171        {
2172            if( !m_istable )
2173            {
2174                startBlockLevel();
2175                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
2176                m_istable = true;
2177                m_rowNum = 0;
2178            }
2179
2180            m_rowNum++;
2181            Element tr = ( m_rowNum % 2 != 0 )
2182                       ? new Element("tr").setAttribute("class", "odd")
2183                       : new Element("tr");
2184            el = pushElement( tr );
2185        }
2186
2187        //
2188        //  Check out which table cell element to start;
2189        //  a header element (th) or a regular element (td).
2190        //
2191        int ch = nextToken();
2192
2193        if( ch == '|' )
2194        {
2195            if( !newLine )
2196            {
2197                el = popElement("th");
2198                if( el == null ) popElement("td");
2199            }
2200            el = pushElement( new Element("th") );
2201        }
2202        else
2203        {
2204            if( !newLine )
2205            {
2206                el = popElement("td");
2207                if( el == null ) popElement("th");
2208            }
2209
2210            el = pushElement( new Element("td") );
2211
2212            pushBack( ch );
2213        }
2214
2215        return el;
2216    }
2217
2218    /**
2219     *  Generic escape of next character or entity.
2220     */
2221    private Element handleTilde()
2222        throws IOException
2223    {
2224        int ch = nextToken();
2225
2226        if( ch == ' ' )
2227        {
2228            if( m_wysiwygEditorMode )
2229            {
2230                m_plainTextBuf.append( "~ " );
2231            }
2232            return m_currentElement;
2233        }
2234
2235        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
2236            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
2237            ch == '{' || ch == ']' || ch == '}' || ch == '%' )
2238        {
2239            if( m_wysiwygEditorMode )
2240            {
2241                m_plainTextBuf.append( '~' );
2242            }
2243
2244            m_plainTextBuf.append( (char)ch );
2245            m_plainTextBuf.append(readWhile( ""+(char)ch ));
2246            return m_currentElement;
2247        }
2248
2249        // No escape.
2250        pushBack( ch );
2251
2252        return null;
2253    }
2254
2255    private void fillBuffer( Element startElement )
2256        throws IOException
2257    {
2258        m_currentElement = startElement;
2259
2260        boolean quitReading = false;
2261        m_newLine = true;
2262        disableOutputEscaping();
2263
2264        while(!quitReading)
2265        {
2266            int ch = nextToken();
2267
2268            if( ch == -1 ) break;
2269
2270            //
2271            //  Check if we're actually ending the preformatted mode.
2272            //  We still must do an entity transformation here.
2273            //
2274            if( m_isEscaping )
2275            {
2276                if( ch == '}' )
2277                {
2278                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
2279                }
2280                else if( ch == -1 )
2281                {
2282                    quitReading = true;
2283                }
2284                else if( ch == '\r' )
2285                {
2286                    // DOS line feeds we ignore.
2287                }
2288                else if( ch == '<' )
2289                {
2290                    m_plainTextBuf.append( "&lt;" );
2291                }
2292                else if( ch == '>' )
2293                {
2294                    m_plainTextBuf.append( "&gt;" );
2295                }
2296                else if( ch == '&' )
2297                {
2298                    m_plainTextBuf.append( "&amp;" );
2299                }
2300                else if( ch == '~' )
2301                {
2302                    String braces = readWhile("}");
2303                    if( braces.length() >= 3 )
2304                    {
2305                        m_plainTextBuf.append("}}}");
2306
2307                        braces = braces.substring(3);
2308                    }
2309                    else
2310                    {
2311                        m_plainTextBuf.append( (char) ch );
2312                    }
2313
2314                    for( int i = braces.length()-1; i >= 0; i-- )
2315                    {
2316                        pushBack(braces.charAt(i));
2317                    }
2318                }
2319                else
2320                {
2321                    m_plainTextBuf.append( (char) ch );
2322                }
2323
2324                continue;
2325            }
2326
2327            //
2328            //  An empty line stops a list
2329            //
2330            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 )
2331            {
2332                m_plainTextBuf.append(unwindGeneralList());
2333            }
2334
2335            if( m_newLine && ch != '|' && m_istable )
2336            {
2337                popElement("table");
2338                m_istable = false;
2339            }
2340
2341            int skip = IGNORE;
2342
2343            //
2344            //  Do the actual parsing and catch any errors.
2345            //
2346            try
2347            {
2348                skip = parseToken( ch );
2349            }
2350            catch( IllegalDataException e )
2351            {
2352                log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage());
2353
2354                makeError("Error: "+cleanupSuspectData(e.getMessage()) );
2355            }
2356
2357            //
2358            //   The idea is as follows:  If the handler method returns
2359            //   an element (el != null), it is assumed that it has been
2360            //   added in the stack.  Otherwise the character is added
2361            //   as is to the plaintext buffer.
2362            //
2363            //   For the transition phase, if s != null, it also gets
2364            //   added in the plaintext buffer.
2365            //
2366
2367            switch( skip )
2368            {
2369                case ELEMENT:
2370                    m_newLine = false;
2371                    break;
2372
2373                case CHARACTER:
2374                    m_plainTextBuf.append( (char) ch );
2375                    m_newLine = false;
2376                    break;
2377
2378                case IGNORE:
2379                default:
2380                    break;
2381            }
2382        }
2383
2384        closeHeadings();
2385        popElement("domroot");
2386    }
2387
2388    private String cleanupSuspectData( String s )
2389    {
2390        StringBuilder sb = new StringBuilder( s.length() );
2391
2392        for( int i = 0; i < s.length(); i++ )
2393        {
2394            char c = s.charAt(i);
2395
2396            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
2397            else sb.append( "0x"+Integer.toString(c,16).toUpperCase() );
2398        }
2399
2400        return sb.toString();
2401    }
2402
2403    /** The token is a plain character. */
2404    protected static final int CHARACTER = 0;
2405
2406    /** The token is a wikimarkup element. */
2407    protected static final int ELEMENT   = 1;
2408
2409    /** The token is to be ignored. */
2410    protected static final int IGNORE    = 2;
2411
2412    /**
2413     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
2414     *  you think this was a wiki markup element, and IGNORE, if you think
2415     *  we should ignore this altogether.
2416     *  <p>
2417     *  To add your own MarkupParser, you can override this method, but it
2418     *  is recommended that you call super.parseToken() as well to gain advantage
2419     *  of JSPWiki's own markup.  You can call it at the start of your own
2420     *  parseToken() or end - it does not matter.
2421     *
2422     * @param ch The character under investigation
2423     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2424     * @throws IOException If parsing fails.
2425     */
2426    protected int parseToken( int ch )
2427        throws IOException
2428    {
2429        Element el = null;
2430
2431        //
2432        //  Now, check the incoming token.
2433        //
2434        switch( ch )
2435        {
2436          case '\r':
2437            // DOS linefeeds we forget
2438            return IGNORE;
2439
2440          case '\n':
2441            //
2442            //  Close things like headings, etc.
2443            //
2444
2445            // FIXME: This is not really very fast
2446
2447            closeHeadings();
2448
2449            popElement("dl"); // Close definition lists.
2450            if( m_istable )
2451            {
2452                popElement("tr");
2453            }
2454
2455            m_isdefinition = false;
2456
2457            if( m_newLine )
2458            {
2459                // Paragraph change.
2460                startBlockLevel();
2461
2462                //
2463                //  Figure out which elements cannot be enclosed inside
2464                //  a <p></p> pair according to XHTML rules.
2465                //
2466                String nextLine = peekAheadLine();
2467                if( nextLine.length() == 0 ||
2468                    (nextLine.length() > 0 &&
2469                     !nextLine.startsWith("{{{") &&
2470                     !nextLine.startsWith("----") &&
2471                     !nextLine.startsWith("%%") &&
2472                     "*#!;".indexOf( nextLine.charAt(0) ) == -1) )
2473                {
2474                    pushElement( new Element("p") );
2475                    m_isOpenParagraph = true;
2476
2477                    if( m_restartitalic )
2478                    {
2479                        pushElement( new Element("i") );
2480                        m_isitalic = true;
2481                        m_restartitalic = false;
2482                    }
2483                    if( m_restartbold )
2484                    {
2485                        pushElement( new Element("b") );
2486                        m_isbold = true;
2487                        m_restartbold = false;
2488                    }
2489                }
2490            }
2491            else
2492            {
2493                m_plainTextBuf.append("\n");
2494                m_newLine = true;
2495            }
2496            return IGNORE;
2497
2498
2499          case '\\':
2500            el = handleBackslash();
2501            break;
2502
2503          case '_':
2504            el = handleUnderscore();
2505            break;
2506
2507          case '\'':
2508            el = handleApostrophe();
2509            break;
2510
2511          case '{':
2512            el = handleOpenbrace( m_newLine );
2513            break;
2514
2515          case '}':
2516            el = handleClosebrace();
2517            break;
2518
2519          case '-':
2520            if( m_newLine )
2521                el = handleDash();
2522
2523            break;
2524
2525          case '!':
2526            if( m_newLine )
2527            {
2528                el = handleHeading();
2529            }
2530            break;
2531
2532          case ';':
2533            if( m_newLine )
2534            {
2535                el = handleDefinitionList();
2536            }
2537            break;
2538
2539          case ':':
2540            if( m_isdefinition )
2541            {
2542                popElement("dt");
2543                el = pushElement( new Element("dd") );
2544                m_isdefinition = false;
2545            }
2546            break;
2547
2548          case '[':
2549            el = handleOpenbracket();
2550            break;
2551
2552          case '*':
2553            if( m_newLine )
2554            {
2555                pushBack('*');
2556                el = handleGeneralList();
2557            }
2558            break;
2559
2560          case '#':
2561            if( m_newLine )
2562            {
2563                pushBack('#');
2564                el = handleGeneralList();
2565            }
2566            break;
2567
2568          case '|':
2569            el = handleBar( m_newLine );
2570            break;
2571
2572          case '~':
2573            el = handleTilde();
2574            break;
2575
2576          case '%':
2577            el = handleDiv( m_newLine );
2578            break;
2579
2580          case '/':
2581            el = handleSlash( m_newLine );
2582            break;
2583
2584          default:
2585            break;
2586        }
2587
2588        return el != null ? ELEMENT : CHARACTER;
2589    }
2590
2591    private void closeHeadings()
2592    {
2593        if( m_lastHeading != null && !m_wysiwygEditorMode )
2594        {
2595            // Add the hash anchor element at the end of the heading
2596            addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) );
2597            m_lastHeading = null;
2598        }
2599        popElement("h2");
2600        popElement("h3");
2601        popElement("h4");
2602    }
2603
2604    /**
2605     *  Parses the entire document from the Reader given in the constructor or
2606     *  set by {@link #setInputReader(Reader)}.
2607     *
2608     *  @return A WikiDocument, ready to be passed to the renderer.
2609     *  @throws IOException If parsing cannot be accomplished.
2610     */
2611    @Override
2612    public WikiDocument parse()
2613        throws IOException
2614    {
2615        WikiDocument d = new WikiDocument( m_context.getPage() );
2616        d.setContext( m_context );
2617
2618        Element rootElement = new Element("domroot");
2619
2620        d.setRootElement( rootElement );
2621
2622        fillBuffer( rootElement );
2623
2624        paragraphify(rootElement);
2625
2626        return d;
2627    }
2628
2629    /**
2630     *  Checks out that the first paragraph is correctly installed.
2631     *
2632     *  @param rootElement
2633     */
2634    private void paragraphify(Element rootElement)
2635    {
2636        //
2637        //  Add the paragraph tag to the first paragraph
2638        //
2639        List< Content > kids = rootElement.getContent();
2640
2641        if( rootElement.getChild("p") != null )
2642        {
2643            ArrayList<Content> ls = new ArrayList<>();
2644            int idxOfFirstContent = 0;
2645            int count = 0;
2646
2647            for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ )
2648            {
2649                Content c = i.next();
2650                if( c instanceof Element )
2651                {
2652                    String name = ( ( Element )c ).getName();
2653                    if( isBlockLevel( name ) ) break;
2654                }
2655
2656                if( !(c instanceof ProcessingInstruction) )
2657                {
2658                    ls.add( c );
2659                    if( idxOfFirstContent == 0 ) idxOfFirstContent = count;
2660                }
2661            }
2662
2663            //
2664            //  If there were any elements, then add a new <p> (unless it would
2665            //  be an empty one)
2666            //
2667            if( ls.size() > 0 )
2668            {
2669                Element newel = new Element("p");
2670
2671                for( Iterator< Content > i = ls.iterator(); i.hasNext(); )
2672                {
2673                    Content c = i.next();
2674
2675                    c.detach();
2676                    newel.addContent(c);
2677                }
2678
2679                //
2680                // Make sure there are no empty <p/> tags added.
2681                //
2682                if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() )
2683                    rootElement.addContent(idxOfFirstContent, newel);
2684            }
2685        }
2686    }
2687
2688}