001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.commons.text.StringEscapeUtils;
023import org.apache.logging.log4j.LogManager;
024import org.apache.logging.log4j.Logger;
025import org.apache.oro.text.regex.MalformedPatternException;
026import org.apache.oro.text.regex.MatchResult;
027import org.apache.oro.text.regex.Pattern;
028import org.apache.oro.text.regex.PatternCompiler;
029import org.apache.oro.text.regex.PatternMatcher;
030import org.apache.oro.text.regex.Perl5Compiler;
031import org.apache.oro.text.regex.Perl5Matcher;
032import org.apache.wiki.InternalWikiException;
033import org.apache.wiki.StringTransmutator;
034import org.apache.wiki.api.core.Acl;
035import org.apache.wiki.api.core.Context;
036import org.apache.wiki.api.core.ContextEnum;
037import org.apache.wiki.api.core.Page;
038import org.apache.wiki.api.exceptions.PluginException;
039import org.apache.wiki.api.plugin.Plugin;
040import org.apache.wiki.api.spi.Wiki;
041import org.apache.wiki.attachment.AttachmentManager;
042import org.apache.wiki.auth.AuthorizationManager;
043import org.apache.wiki.auth.UserManager;
044import org.apache.wiki.auth.WikiSecurityException;
045import org.apache.wiki.auth.acl.AclManager;
046import org.apache.wiki.i18n.InternationalizationManager;
047import org.apache.wiki.preferences.Preferences;
048import org.apache.wiki.util.TextUtil;
049import org.apache.wiki.util.XmlUtil;
050import org.apache.wiki.variables.VariableManager;
051import org.jdom2.Attribute;
052import org.jdom2.Content;
053import org.jdom2.Element;
054import org.jdom2.IllegalDataException;
055import org.jdom2.ProcessingInstruction;
056import org.jdom2.Verifier;
057
058import javax.xml.transform.Result;
059import java.io.IOException;
060import java.io.Reader;
061import java.io.StringReader;
062import java.text.MessageFormat;
063import java.util.ArrayList;
064import java.util.Arrays;
065import java.util.Collection;
066import java.util.EmptyStackException;
067import java.util.HashMap;
068import java.util.Iterator;
069import java.util.List;
070import java.util.Map;
071import java.util.Properties;
072import java.util.ResourceBundle;
073import java.util.Stack;
074
075/**
076 *  Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the
077 *  heart and soul of JSPWiki : make sure you test properly anything that is added,
078 *  or else it breaks down horribly.
079 *
080 *  @since  2.4
081 */
082public class JSPWikiMarkupParser extends MarkupParser {
083
084    protected static final int              READ          = 0;
085    protected static final int              EDIT          = 1;
086    protected static final int              EMPTY         = 2;  // Empty message
087    protected static final int              LOCAL         = 3;
088    protected static final int              LOCALREF      = 4;
089    protected static final int              IMAGE         = 5;
090    protected static final int              EXTERNAL      = 6;
091    protected static final int              INTERWIKI     = 7;
092    protected static final int              IMAGELINK     = 8;
093    protected static final int              IMAGEWIKILINK = 9;
094    protected static final int              ATTACHMENT    = 10;
095
096    private static final Logger log = LogManager.getLogger( JSPWikiMarkupParser.class );
097
098    private boolean        m_isbold;
099    private boolean        m_isitalic;
100    private boolean        m_istable;
101    private boolean        m_isPre;
102    private boolean        m_isEscaping;
103    private boolean        m_isdefinition;
104    private boolean        m_isPreBlock;
105
106    /** Contains style information, in multiple forms. */
107    private final Stack<Boolean> m_styleStack   = new Stack<>();
108
109     // general list handling
110    private int            m_genlistlevel;
111    private final StringBuilder  m_genlistBulletBuffer = new StringBuilder(10);  // stores the # and * pattern
112    private final boolean        m_allowPHPWikiStyleLists = true;
113
114    private boolean        m_isOpenParagraph;
115
116    /** Parser for extended link functionality. */
117    private final LinkParser     m_linkParser = new LinkParser();
118
119    /** Keeps track of any plain text that gets put in the Text nodes */
120    private StringBuilder  m_plainTextBuf = new StringBuilder(20);
121
122    private Element        m_currentElement;
123
124    /** Keep track of duplicate header names.  */
125    private final Map<String, Integer>   m_titleSectionCounter = new HashMap<>();
126
127    /** If true, then considers CamelCase links as well. */
128    private boolean                m_camelCaseLinks;
129
130    /** If true, then generate special output for wysiwyg editing in certain cases */
131    private boolean                m_wysiwygEditorMode;
132
133    /** If true, consider URIs that have no brackets as well. */
134    // FIXME: Currently reserved, but not used.
135    private boolean                m_plainUris;
136
137    /** If true, all outward links use a small link image. */
138    private boolean                m_useOutlinkImage     = true;
139
140    private boolean                m_useAttachmentImage  = true;
141
142    /** If true, allows raw HTML. */
143    private boolean                m_allowHTML;
144
145    private boolean                m_useRelNofollow;
146
147    private final PatternCompiler        m_compiler = new Perl5Compiler();
148
149    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
150
151    private final PatternMatcher         m_camelCaseMatcher = new Perl5Matcher();
152    private Pattern                m_camelCasePattern;
153
154    private int                    m_rowNum              = 1;
155
156    private Heading                m_lastHeading;
157
158    private static final String CAMELCASE_PATTERN     = "JSPWikiMarkupParser.camelCasePattern";
159
160    /**
161     *  Creates a markup parser.
162     *
163     *  @param context The WikiContext which controls the parsing
164     *  @param in Where the data is read from.
165     */
166    public JSPWikiMarkupParser( final Context context, final Reader in )
167    {
168        super( context, in );
169        initialize();
170    }
171
172    // FIXME: parsers should be pooled for better performance.
173    private void initialize() {
174        initInlineImagePatterns();
175
176        m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN );
177        if( m_camelCasePattern == null ) {
178            try {
179                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
180            } catch( final MalformedPatternException e ) {
181                log.fatal("Internal error: Someone put in a faulty pattern.",e);
182                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e);
183            }
184            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
185        }
186
187        //  Set the properties.
188        final Properties props = m_engine.getWikiProperties();
189        final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
190
191        if( cclinks != null ) {
192            m_camelCaseLinks = TextUtil.isPositive( cclinks );
193        } else {
194            m_camelCaseLinks  = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks );
195        }
196
197        final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE );
198        if( wysiwygVariable != null ) {
199            m_wysiwygEditorMode = wysiwygVariable;
200        }
201
202        m_plainUris          = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris );
203        m_useOutlinkImage    = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage );
204        m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage );
205        m_allowHTML          = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML );
206        m_useRelNofollow     = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow );
207
208        if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) {
209            disableAccessRules();
210        }
211
212        m_context.getPage().setHasMetadata();
213    }
214
215    /**
216     *  Calls a transmutator chain.
217     *
218     *  @param list Chain to call
219     *  @param text Text that should be passed to the mutate() method of each of the mutators in the chain.
220     *  @return The result of the mutation.
221     */
222    protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) {
223        if( list == null || list.size() == 0 ) {
224            return text;
225        }
226
227        for( final StringTransmutator m : list ) {
228            text = m.mutate( m_context, text );
229        }
230
231        return text;
232    }
233
234    /**
235     * Calls the heading listeners.
236     *
237     * @param param A Heading object.
238     */
239    protected void callHeadingListenerChain( final Heading param ) {
240        for( final HeadingListener h : m_headingListenerChain ) {
241            h.headingAdded( m_context, param );
242        }
243    }
244
245    /**
246     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
247     *  if you really know what you are doing.
248     *
249     *  @param type One of the types above
250     *  @param link URL to which to link to
251     *  @param text Link text
252     *  @param section If a particular section identifier is required.
253     *  @return An A element.
254     *  @since 2.4.78
255     */
256    protected Element createAnchor( final int type, final String link, String text, String section)
257    {
258        text = escapeHTMLEntities( text );
259        section = escapeHTMLEntities( section );
260        final Element el = new Element("a");
261        el.setAttribute("class",CLASS_TYPES[type]);
262        el.setAttribute("href",link+section);
263        el.addContent(text);
264        return el;
265    }
266
267    private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes )
268    {
269        Element el = null;
270
271        if( text == null ) text = link;
272
273        text = callMutatorChain( m_linkMutators, text );
274
275        section = (section != null) ? ("#"+section) : "";
276
277        // Make sure we make a link name that can be accepted
278        // as a valid URL.
279
280        if( link.isEmpty() )
281        {
282            type = EMPTY;
283        }
284        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
285
286        switch(type)
287        {
288            case READ:
289                el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section );
290                break;
291
292            case EDIT:
293                el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" );
294                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
295
296                break;
297
298            case EMPTY:
299                el = new Element("u").addContent(text);
300                break;
301
302                //
303                //  These two are for local references - footnotes and
304                //  references to footnotes.
305                //  We embed the page name (or whatever WikiContext gives us)
306                //  to make sure the links are unique across Wiki.
307                //
308            case LOCALREF:
309                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
310                break;
311
312            case LOCAL:
313                el = new Element("a").setAttribute("class",CLASS_FOOTNOTE);
314                el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1));
315                el.addContent("["+text+"]");
316                break;
317
318                //
319                //  With the image, external and interwiki types we need to
320                //  make sure nobody can put in Javascript or something else
321                //  annoying into the links themselves.  We do this by preventing
322                //  a haxor from stopping the link name short with quotes in
323                //  fillBuffer().
324                //
325            case IMAGE:
326                el = new Element("img").setAttribute("class","inline");
327                el.setAttribute("src",link);
328                el.setAttribute("alt",text);
329                break;
330
331            case IMAGELINK:
332                el = new Element("img").setAttribute("class","inline");
333                el.setAttribute("src",link);
334                el.setAttribute("alt",text);
335                el = createAnchor(IMAGELINK,text,"","").addContent(el);
336                break;
337
338            case IMAGEWIKILINK:
339                final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text );
340                el = new Element("img").setAttribute("class","inline");
341                el.setAttribute("src",link);
342                el.setAttribute("alt",text);
343                el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el);
344                break;
345
346            case EXTERNAL:
347                el = createAnchor( EXTERNAL, link, text, section );
348                if( m_useRelNofollow ) el.setAttribute("rel","nofollow");
349                break;
350
351            case INTERWIKI:
352                el = createAnchor( INTERWIKI, link, text, section );
353                break;
354
355            case ATTACHMENT:
356                final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link );
357                final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link );
358                final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" );
359                el = createAnchor( ATTACHMENT, attlink, text, "" );
360
361                if(  m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) {
362                    el.setAttribute("download", "");
363                }
364
365                pushElement(el);
366                popElement(el.getName());
367
368                if( m_useAttachmentImage )
369                {
370                    el = new Element("img").setAttribute("src",imglink);
371                    el.setAttribute("border","0");
372                    el.setAttribute("alt","(info)");
373
374                    el = new Element("a").setAttribute("href",infolink).addContent(el);
375                    el.setAttribute("class","infolink");
376                }
377                else
378                {
379                    el = null;
380                }
381                break;
382
383            default:
384                break;
385        }
386
387        if( el != null && attributes != null )
388        {
389            while( attributes.hasNext() )
390            {
391                final Attribute attr = attributes.next();
392                if( attr != null )
393                {
394                    el.setAttribute(attr);
395                }
396            }
397        }
398
399        if( el != null )
400        {
401            flushPlainText();
402            m_currentElement.addContent( el );
403        }
404        return el;
405    }
406
407    /**
408     *  These are all of the HTML 4.01 block-level elements.
409     */
410    private static final String[] BLOCK_ELEMENTS = {
411        "address", "blockquote", "div", "dl", "fieldset", "form",
412        "h1", "h2", "h3", "h4", "h5", "h6",
413        "hr", "noscript", "ol", "p", "pre", "table", "ul"
414    };
415
416    private static boolean isBlockLevel( final String name )
417    {
418        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
419    }
420
421    /**
422     *  This method peeks ahead in the stream until EOL and returns the result.
423     *  It will keep the buffers untouched.
424     *
425     *  @return The string from the current position to the end of line.
426     */
427
428    // FIXME: Always returns an empty line, even if the stream is full.
429    private String peekAheadLine()
430        throws IOException
431    {
432        final String s = readUntilEOL().toString();
433
434        if( s.length() > PUSHBACK_BUFFER_SIZE )
435        {
436            log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters.  Attempting to recover...");
437            pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) );
438        }
439        else
440        {
441            try
442            {
443                pushBack( s );
444            }
445            catch( final IOException e )
446            {
447                log.warn("Pushback failed: the line is probably too long.  Attempting to recover.");
448            }
449        }
450        return s;
451    }
452
453    private int flushPlainText()
454    {
455        final int numChars = m_plainTextBuf.length();
456
457        if( numChars > 0 )
458        {
459            String buf;
460
461            if( !m_allowHTML )
462            {
463                buf = escapeHTMLEntities(m_plainTextBuf.toString());
464            }
465            else
466            {
467                buf = m_plainTextBuf.toString();
468            }
469            //
470            //  We must first empty the buffer because the side effect of
471            //  calling makeCamelCaseLink() is to call this routine.
472            //
473
474            m_plainTextBuf = new StringBuilder(20);
475
476            try
477            {
478                //
479                //  This is the heaviest part of parsing, and therefore we can
480                //  do some optimization here.
481                //
482                //  1) Only when the length of the buffer is big enough, we try to do the match
483                //
484
485                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 )
486                {
487                    // System.out.println("Buffer="+buf);
488
489                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) )
490                    {
491                        final MatchResult result = m_camelCaseMatcher.getMatch();
492
493                        final String firstPart = buf.substring(0,result.beginOffset(0));
494                        String prefix = result.group(1);
495
496                        if( prefix == null ) prefix = "";
497
498                        final String camelCase = result.group(2);
499                        final String protocol  = result.group(3);
500                        String uri       = protocol+result.group(4);
501                        buf              = buf.substring(result.endOffset(0));
502
503                        m_currentElement.addContent( firstPart );
504
505                        //
506                        //  Check if the user does not wish to do URL or WikiWord expansion
507                        //
508                        if( prefix.endsWith("~") || prefix.indexOf('[') != -1 )
509                        {
510                            if( prefix.endsWith("~") )
511                            {
512                                if( m_wysiwygEditorMode )
513                                {
514                                    m_currentElement.addContent( "~" );
515                                }
516                                prefix = prefix.substring(0,prefix.length()-1);
517                            }
518                            if( camelCase != null )
519                            {
520                                m_currentElement.addContent( prefix+camelCase );
521                            }
522                            else if( protocol != null )
523                            {
524                                m_currentElement.addContent( prefix+uri );
525                            }
526                            continue;
527                        }
528
529                        //
530                        //  Fine, then let's check what kind of a link this was
531                        //  and emit the proper elements
532                        //
533                        if( protocol != null )
534                        {
535                            final char c = uri.charAt(uri.length()-1);
536                            if( c == '.' || c == ',' )
537                            {
538                                uri = uri.substring(0,uri.length()-1);
539                                buf = c + buf;
540                            }
541                            // System.out.println("URI match "+uri);
542                            m_currentElement.addContent( prefix );
543                            makeDirectURILink( uri );
544                        }
545                        else
546                        {
547                            // System.out.println("Matched: '"+camelCase+"'");
548                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
549                            // System.out.println("prefix="+prefix);
550                            m_currentElement.addContent( prefix );
551
552                            makeCamelCaseLink( camelCase );
553                        }
554                    }
555
556                    m_currentElement.addContent( buf );
557                }
558                else
559                {
560                    //
561                    //  No camelcase asked for, just add the elements
562                    //
563                    m_currentElement.addContent( buf );
564                }
565            }
566            catch( final IllegalDataException e )
567            {
568                //
569                // Sometimes it's possible that illegal XML chars is added to the data.
570                // Here we make sure it does not stop parsing.
571                //
572                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
573            }
574        }
575
576        return numChars;
577    }
578
579    /**
580     *  Escapes XML entities in a HTML-compatible way (i.e. does not escape
581     *  entities that are already escaped).
582     *
583     *  @param buf
584     *  @return An escaped string.
585     */
586    private String escapeHTMLEntities( final String buf)
587    {
588        final StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
589
590        for( int i = 0; i < buf.length(); i++ )
591        {
592            final char ch = buf.charAt(i);
593
594            if( ch == '<' )
595            {
596                tmpBuf.append("&lt;");
597            }
598            else if( ch == '>' )
599            {
600                tmpBuf.append("&gt;");
601            }
602            else if( ch == '\"' )
603            {
604                tmpBuf.append("&quot;");
605            }
606            else if( ch == '&' )
607            {
608                //
609                //  If the following is an XML entity reference (&#.*;) we'll
610                //  leave it as it is; otherwise we'll replace it with an &amp;
611                //
612
613                boolean isEntity = false;
614                final StringBuilder entityBuf = new StringBuilder();
615
616                if( i < buf.length() -1 )
617                {
618                    for( int j = i; j < buf.length(); j++ )
619                    {
620                        final char ch2 = buf.charAt(j);
621
622                        if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' )
623                        {
624                            entityBuf.append(ch2);
625
626                            if( ch2 == ';' )
627                            {
628                                isEntity = true;
629                                break;
630                            }
631                        }
632                        else
633                        {
634                            break;
635                        }
636                    }
637                }
638
639                if( isEntity )
640                {
641                    tmpBuf.append( entityBuf );
642                    i = i + entityBuf.length() - 1;
643                }
644                else
645                {
646                    tmpBuf.append("&amp;");
647                }
648
649            }
650            else
651            {
652                tmpBuf.append( ch );
653            }
654        }
655
656        return tmpBuf.toString();
657    }
658
659    private Element pushElement( final Element e )
660    {
661        flushPlainText();
662        m_currentElement.addContent( e );
663        m_currentElement = e;
664
665        return e;
666    }
667
668    private Element addElement( final Content e )
669    {
670        if( e != null )
671        {
672            flushPlainText();
673            m_currentElement.addContent( e );
674        }
675        return m_currentElement;
676    }
677
678    /**
679     *  All elements that can be empty by the HTML DTD.
680     */
681    //  Keep sorted.
682    private static final String[] EMPTY_ELEMENTS = {
683        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
684    };
685
686    /**
687     *  Goes through the current element stack and pops all elements until this
688     *  element is found - this essentially "closes" and element.
689     *
690     *  @param s
691     *  @return The new current element, or null, if there was no such element in the entire stack.
692     */
693    private Element popElement( final String s )
694    {
695        final int flushedBytes = flushPlainText();
696
697        Element currEl = m_currentElement;
698
699        while( currEl.getParentElement() != null )
700        {
701            if( currEl.getName().equals(s) && !currEl.isRootElement() )
702            {
703                m_currentElement = currEl.getParentElement();
704
705                //
706                //  Check if it's okay for this element to be empty.  Then we will
707                //  trick the JDOM generator into not generating an empty element,
708                //  by putting an empty string between the tags.  Yes, it's a kludge
709                //  but what'cha gonna do about it. :-)
710                //
711
712                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 )
713                {
714                    currEl.addContent("");
715                }
716
717                return m_currentElement;
718            }
719
720            currEl = currEl.getParentElement();
721        }
722
723        return null;
724    }
725
726
727    /**
728     *  Reads the stream until it meets one of the specified
729     *  ending characters, or stream end.  The ending character will be left
730     *  in the stream.
731     */
732    private String readUntil( final String endChars )
733        throws IOException
734    {
735        final StringBuilder sb = new StringBuilder( 80 );
736        int ch = nextToken();
737
738        while( ch != -1 )
739        {
740            if( ch == '\\' )
741            {
742                ch = nextToken();
743                if( ch == -1 )
744                {
745                    break;
746                }
747            }
748            else
749            {
750                if( endChars.indexOf((char)ch) != -1 )
751                {
752                    pushBack( ch );
753                    break;
754                }
755            }
756            sb.append( (char) ch );
757            ch = nextToken();
758        }
759
760        return sb.toString();
761    }
762
763    /**
764     *  Reads the stream while the characters that have been specified are
765     *  in the stream, returning then the result as a String.
766     */
767    private String readWhile( final String endChars )
768        throws IOException
769    {
770        final StringBuilder sb = new StringBuilder( 80 );
771        int ch = nextToken();
772
773        while( ch != -1 )
774        {
775            if( endChars.indexOf((char)ch) == -1 )
776            {
777                pushBack( ch );
778                break;
779            }
780
781            sb.append( (char) ch );
782            ch = nextToken();
783        }
784
785        return sb.toString();
786    }
787
788    private JSPWikiMarkupParser m_cleanTranslator;
789
790    /**
791     *  Does a lazy init.  Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before
792     *  the TranslatorReader it is contained by is up.
793     */
794    private JSPWikiMarkupParser getCleanTranslator() {
795        if( m_cleanTranslator == null ) {
796            final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() );
797            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
798            m_cleanTranslator.m_allowHTML = true;
799        }
800
801        return m_cleanTranslator;
802    }
803    /**
804     *  Modifies the "hd" parameter to contain proper values.  Because
805     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
806     *  % after url encoding with '_'.
807     *  <p>
808     *  Counts also duplicate headings (= headings with similar name), and
809     *  attaches a counter.
810     */
811    private String makeHeadingAnchor( final String baseName, String title, final Heading hd ) {
812        hd.m_titleText = title;
813        title = MarkupParser.wikifyLink( title );
814        hd.m_titleSection = m_engine.encodeName(title);
815
816        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) {
817            final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1;
818            m_titleSectionCounter.put( hd.m_titleSection, count );
819            hd.m_titleSection += "-" + count;
820        } else {
821            m_titleSectionCounter.put( hd.m_titleSection, 1 );
822        }
823
824        hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection;
825        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
826        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
827
828        return hd.m_titleAnchor;
829    }
830
831    private String makeSectionTitle( String title ) {
832        title = title.trim();
833        try {
834            final JSPWikiMarkupParser dtr = getCleanTranslator();
835            dtr.setInputReader( new StringReader( title ) );
836            final WikiDocument doc = dtr.parse();
837            doc.setContext( m_context );
838
839            return XmlUtil.extractTextFromDocument( doc );
840        } catch( final IOException e ) {
841            log.fatal("Title parsing not working", e );
842            throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e );
843        }
844    }
845
846    /**
847     *  Returns XHTML for the heading.
848     *
849     *  @param level The level of the heading.  @see Heading
850     *  @param title the title for the heading
851     *  @param hd a List to which heading should be added
852     *  @return An Element containing the heading
853     */
854    public Element makeHeading( final int level, final String title, final Heading hd ) {
855        final Element el;
856        final String pageName = m_context.getPage().getName();
857        final String outTitle = makeSectionTitle( title );
858        hd.m_level = level;
859
860        switch( level ) {
861          case Heading.HEADING_SMALL:
862            el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
863            break;
864
865          case Heading.HEADING_MEDIUM:
866            el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
867            break;
868
869          case Heading.HEADING_LARGE:
870            el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
871            break;
872
873          default:
874            throw new InternalWikiException( "Illegal heading type " + level );
875        }
876
877        return el;
878    }
879
880    /**
881     *  When given a link to a WikiName, we just return
882     *  a proper HTML link for it.  The local link mutator
883     *  chain is also called.
884     */
885    private Element makeCamelCaseLink( final String wikiname )
886    {
887        final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname );
888
889        callMutatorChain( m_localLinkMutatorChain, wikiname );
890
891        if( matchedLink != null ) {
892            makeLink( READ, matchedLink, wikiname, null, null );
893        } else {
894            makeLink( EDIT, wikiname, wikiname, null, null );
895        }
896
897        return m_currentElement;
898    }
899
900    /** Holds the image URL for the duration of this parser */
901    private String m_outlinkImageURL;
902
903    /**
904     *  Returns an element for the external link image (out.png).  However,
905     *  this method caches the URL for the lifetime of this MarkupParser,
906     *  because it's commonly used, and we'll end up with possibly hundreds
907     *  our thousands of references to it...  It's a lot faster, too.
908     *
909     *  @return  An element containing the HTML for the outlink image.
910     */
911    private Element outlinkImage()
912    {
913        Element el = null;
914
915        if( m_useOutlinkImage )
916        {
917            if( m_outlinkImageURL == null )
918            {
919                m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE );
920            }
921
922            el = new Element( "img" ).setAttribute( "class", OUTLINK );
923            el.setAttribute( "src", m_outlinkImageURL );
924            el.setAttribute( "alt","" );
925        }
926
927        return el;
928    }
929
930    /**
931     *  Takes an URL and turns it into a regular wiki link.  Unfortunately,
932     *  because of the way that flushPlainText() works, it already encodes
933     *  all of the XML entities.  But so does WikiContext.getURL(), so we
934     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
935     *  <p>
936     *  What a crappy problem.
937     *
938     * @param url
939     * @return An anchor Element containing the link.
940     */
941    private Element makeDirectURILink( String url ) {
942        final Element result;
943        String last = null;
944
945        if( url.endsWith( "," ) || url.endsWith( "." ) ) {
946            last = url.substring( url.length() - 1 );
947            url = url.substring( 0, url.length() - 1 );
948        }
949
950        callMutatorChain( m_externalLinkMutatorChain, url );
951
952        if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) {
953            result = handleImageLink( StringUtils.replace( url, "&amp;", "&" ), url, false );
954        } else {
955            result = makeLink( EXTERNAL, StringUtils.replace( url, "&amp;", "&" ), url, null, null );
956            addElement( outlinkImage() );
957        }
958
959        if( last != null ) {
960            m_plainTextBuf.append( last );
961        }
962
963        return result;
964    }
965
966    /**
967     *  Image links are handled differently:
968     *  1. If the text is a WikiName of an existing page,
969     *     it gets linked.
970     *  2. If the text is an external link, then it is inlined.
971     *  3. Otherwise it becomes an ALT text.
972     *
973     *  @param reallink The link to the image.
974     *  @param link     Link text portion, may be a link to somewhere else.
975     *  @param hasLinkText If true, then the defined link had a link text available.
976     *                  This means that the link text may be a link to a wiki page,
977     *                  or an external resource.
978     */
979
980    // FIXME: isExternalLink() is called twice.
981    private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText )
982    {
983        final String possiblePage = MarkupParser.cleanLink( link );
984
985        if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText )
986        {
987            return makeLink( IMAGELINK, reallink, link, null, null );
988        }
989        else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText )
990        {
991            // System.out.println("Orig="+link+", Matched: "+matchedLink);
992            callMutatorChain( m_localLinkMutatorChain, possiblePage );
993
994            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
995        }
996        else
997        {
998            return makeLink( IMAGE, reallink, link, null, null );
999        }
1000    }
1001
1002    private Element handleAccessRule( String ruleLine ) {
1003        if( m_wysiwygEditorMode ) {
1004            m_currentElement.addContent( "[" + ruleLine + "]" );
1005        }
1006
1007        if( !m_parseAccessRules ) {
1008            return m_currentElement;
1009        }
1010        final Page page = m_context.getRealPage();
1011        // UserDatabase db = m_context.getEngine().getUserDatabase();
1012
1013        if( ruleLine.startsWith( "{" ) ) {
1014            ruleLine = ruleLine.substring( 1 );
1015        }
1016
1017        if( ruleLine.endsWith( "}" ) ) {
1018            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1019        }
1020
1021        log.debug("page={}, ACL = {}", page.getName(), ruleLine);
1022
1023        try {
1024            final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine );
1025            page.setAcl( acl );
1026            log.debug( acl.toString() );
1027        } catch( final WikiSecurityException wse ) {
1028            return makeError( wse.getMessage() );
1029        }
1030
1031        return m_currentElement;
1032    }
1033
1034    /**
1035     *  Handles metadata setting [{SET foo=bar}]
1036     */
1037    private Element handleMetadata( final String link ) {
1038        if( m_wysiwygEditorMode ) {
1039            m_currentElement.addContent( "[" + link + "]" );
1040        }
1041
1042        try {
1043            final String args = link.substring( link.indexOf(' '), link.length()-1 );
1044            final String name = args.substring( 0, args.indexOf('=') ).trim();
1045            String val  = args.substring( args.indexOf('=')+1 ).trim();
1046
1047            if( val.startsWith("'") ) {
1048                val = val.substring( 1 );
1049            }
1050            if( val.endsWith("'") ) {
1051                val = val.substring( 0, val.length()-1 );
1052            }
1053
1054            // log.debug("SET name='"+name+"', value='"+val+"'.");
1055
1056            if( !name.isEmpty() && !val.isEmpty() ) {
1057                val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val );
1058                m_context.getPage().setAttribute( name, val );
1059            }
1060        } catch( final Exception e ) {
1061            final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1062            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
1063        }
1064
1065        return m_currentElement;
1066    }
1067
1068    /**
1069     *  Emits a processing instruction that will disable markup escaping. This is
1070     *  very useful if you want to emit HTML directly into the stream.
1071     *
1072     */
1073    private void disableOutputEscaping() {
1074        addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) );
1075    }
1076
1077    /**
1078     *  Gobbles up all hyperlinks that are encased in square brackets.
1079     */
1080    private Element handleHyperlinks( String linktext, final int pos ) {
1081        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1082        final StringBuilder sb = new StringBuilder( linktext.length() + 80 );
1083
1084        if( m_linkParsingOperations.isAccessRule( linktext ) ) {
1085            return handleAccessRule( linktext );
1086        }
1087
1088        if( m_linkParsingOperations.isMetadata( linktext ) ) {
1089            return handleMetadata( linktext );
1090        }
1091
1092        if( m_linkParsingOperations.isPluginLink( linktext ) ) {
1093            try {
1094                final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
1095
1096                // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not.
1097                if( pluginContent != null ) {
1098                    addElement( pluginContent );
1099                    pluginContent.executeParse( m_context );
1100                }
1101            } catch( final PluginException e ) {
1102                log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
1103                //log.info( "Root cause:",e.getRootThrowable() );
1104                if( !m_wysiwygEditorMode ) {
1105                    final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE );
1106                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ),
1107                                                                        m_context.getRealPage().getWiki(),
1108                                                                        m_context.getRealPage().getName(),
1109                                                                        e.getMessage() ) ) );
1110                }
1111            }
1112
1113            return m_currentElement;
1114        }
1115
1116        try {
1117            final LinkParser.Link link = m_linkParser.parse( linktext );
1118            linktext = link.getText();
1119            String linkref = link.getReference();
1120
1121            //
1122            //  Yes, we now have the components separated.
1123            //  linktext = the text the link should have
1124            //  linkref  = the url or page name.
1125            //
1126            //  In many cases these are the same.  [linktext|linkref].
1127            //
1128            if( m_linkParsingOperations.isVariableLink( linktext ) ) {
1129                final Content el = new VariableContent( linktext );
1130
1131                addElement( el );
1132            } else if( m_linkParsingOperations.isExternalLink( linkref ) ) {
1133                // It's an external link, out of this Wiki
1134
1135                callMutatorChain( m_externalLinkMutatorChain, linkref );
1136
1137                if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
1138                    handleImageLink( linkref, linktext, link.hasReference() );
1139                } else {
1140                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
1141                    addElement( outlinkImage() );
1142                }
1143            } else if( link.isInterwikiLink() ) {
1144                // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved.
1145
1146                // FIXME: There is an interesting issue here:  We probably should
1147                //        URLEncode the wikiPage, but we can't since some of the
1148                //        Wikis use slashes (/), which won't survive URLEncoding.
1149                //        Besides, we don't know which character set the other Wiki
1150                //        is using, so you'll have to write the entire name as it appears
1151                //        in the URL.  Bugger.
1152
1153                final String extWiki = link.getExternalWiki();
1154                final String wikiPage = link.getExternalWikiPage();
1155
1156                if( m_wysiwygEditorMode ) {
1157                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
1158                } else {
1159                    String urlReference = m_engine.getInterWikiURL( extWiki );
1160
1161                    if( urlReference != null ) {
1162                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1163                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
1164
1165                        if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) {
1166                            handleImageLink( urlReference, linktext, link.hasReference() );
1167                        } else {
1168                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
1169                        }
1170
1171                        if( m_linkParsingOperations.isExternalLink( urlReference ) ) {
1172                            addElement( outlinkImage() );
1173                        }
1174                    } else {
1175                        final Object[] args = { escapeHTMLEntities( extWiki ) };
1176
1177                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
1178                    }
1179                }
1180            } else if( linkref.startsWith( "#" ) ) {
1181                // It defines a local footnote
1182                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1183            } else if( TextUtil.isNumber( linkref ) ) {
1184                // It defines a reference to a local footnote
1185                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1186            } else {
1187                final int hashMark;
1188
1189                // Internal wiki link, but is it an attachment link?
1190                String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref );
1191                if( attachment != null ) {
1192                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1193
1194                    if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
1195                        attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment );
1196                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1197                    } else {
1198                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1199                    }
1200                } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) {
1201                    // It's an internal Wiki link, but to a named section
1202
1203                    final String namedSection = linkref.substring( hashMark + 1 );
1204                    linkref = linkref.substring( 0, hashMark );
1205
1206                    linkref = MarkupParser.cleanLink( linkref );
1207
1208                    callMutatorChain( m_localLinkMutatorChain, linkref );
1209
1210                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1211                    if( matchedLink != null ) {
1212                        String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) );
1213                        sectref = sectref.replace( '%', '_' );
1214                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1215                    } else {
1216                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1217                    }
1218                } else {
1219                    // It's an internal Wiki link
1220                    linkref = MarkupParser.cleanLink( linkref );
1221
1222                    callMutatorChain( m_localLinkMutatorChain, linkref );
1223
1224                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1225                    if( matchedLink != null ) {
1226                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1227                    } else {
1228                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1229                    }
1230                }
1231            }
1232
1233    } catch( final ParseException e ) {
1234        log.info( "Parser failure: ", e );
1235        final Object[] args = { e.getMessage() };
1236        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1237    }
1238        return m_currentElement;
1239    }
1240
1241    /**
1242     *  Pushes back any string that has been read.  It will obviously
1243     *  be pushed back in a reverse order.
1244     *
1245     *  @since 2.1.77
1246     */
1247    private void pushBack( final String s )
1248        throws IOException
1249    {
1250        for( int i = s.length()-1; i >= 0; i-- )
1251        {
1252            pushBack( s.charAt(i) );
1253        }
1254    }
1255
1256    private Element handleBackslash()
1257        throws IOException
1258    {
1259        final int ch = nextToken();
1260
1261        if( ch == '\\' )
1262        {
1263            final int ch2 = nextToken();
1264
1265            if( ch2 == '\\' )
1266            {
1267                pushElement( new Element("br").setAttribute("clear","all"));
1268                return popElement("br");
1269            }
1270
1271            pushBack( ch2 );
1272
1273            pushElement( new Element("br") );
1274            return popElement("br");
1275        }
1276
1277        pushBack( ch );
1278
1279        return null;
1280    }
1281
1282    private Element handleUnderscore()
1283        throws IOException
1284    {
1285        final int ch = nextToken();
1286        Element el = null;
1287
1288        if( ch == '_' )
1289        {
1290            if( m_isbold )
1291            {
1292                el = popElement("b");
1293            }
1294            else
1295            {
1296                el = pushElement( new Element("b") );
1297            }
1298            m_isbold = !m_isbold;
1299        }
1300        else
1301        {
1302            pushBack( ch );
1303        }
1304
1305        return el;
1306    }
1307
1308
1309    /**
1310     *  For example: italics.
1311     */
1312    private Element handleApostrophe()
1313        throws IOException
1314    {
1315        final int ch = nextToken();
1316        Element el = null;
1317
1318        if( ch == '\'' )
1319        {
1320            if( m_isitalic )
1321            {
1322                el = popElement("i");
1323            }
1324            else
1325            {
1326                el = pushElement( new Element("i") );
1327            }
1328            m_isitalic = !m_isitalic;
1329        }
1330        else
1331        {
1332            pushBack( ch );
1333        }
1334
1335        return el;
1336    }
1337
1338    private Element handleOpenbrace( final boolean isBlock )
1339        throws IOException
1340    {
1341        final int ch = nextToken();
1342
1343        if( ch == '{' )
1344        {
1345            final int ch2 = nextToken();
1346
1347            if( ch2 == '{' )
1348            {
1349                m_isPre = true;
1350                m_isEscaping = true;
1351                m_isPreBlock = isBlock;
1352
1353                if( isBlock )
1354                {
1355                    startBlockLevel();
1356                    return pushElement( new Element("pre") );
1357                }
1358
1359                return pushElement( new Element("span").setAttribute("class","inline-code") );
1360            }
1361
1362            pushBack( ch2 );
1363
1364            return pushElement( new Element("tt") );
1365        }
1366
1367        pushBack( ch );
1368
1369        return null;
1370    }
1371
1372    /**
1373     *  Handles both }} and }}}
1374     */
1375    private Element handleClosebrace()
1376        throws IOException
1377    {
1378        final int ch2 = nextToken();
1379
1380        if( ch2 == '}' )
1381        {
1382            final int ch3 = nextToken();
1383
1384            if( ch3 == '}' )
1385            {
1386                if( m_isPre )
1387                {
1388                    if( m_isPreBlock )
1389                    {
1390                        popElement( "pre" );
1391                    }
1392                    else
1393                    {
1394                        popElement( "span" );
1395                    }
1396
1397                    m_isPre = false;
1398                    m_isEscaping = false;
1399                    return m_currentElement;
1400                }
1401
1402                m_plainTextBuf.append("}}}");
1403                return m_currentElement;
1404            }
1405
1406            pushBack( ch3 );
1407
1408            if( !m_isEscaping )
1409            {
1410                return popElement("tt");
1411            }
1412        }
1413
1414        pushBack( ch2 );
1415
1416        return null;
1417    }
1418
1419    private Element handleDash()
1420        throws IOException
1421    {
1422        int ch = nextToken();
1423
1424        if( ch == '-' )
1425        {
1426            final int ch2 = nextToken();
1427
1428            if( ch2 == '-' )
1429            {
1430                final int ch3 = nextToken();
1431
1432                if( ch3 == '-' )
1433                {
1434                    // Empty away all the rest of the dashes.
1435                    // Do not forget to return the first non-match back.
1436                    do
1437                    {
1438                        ch = nextToken();
1439                    }
1440                    while ( ch == '-' );
1441
1442                    pushBack(ch);
1443                    startBlockLevel();
1444                    pushElement( new Element("hr") );
1445                    return popElement( "hr" );
1446                }
1447
1448                pushBack( ch3 );
1449            }
1450            pushBack( ch2 );
1451        }
1452
1453        pushBack( ch );
1454
1455        return null;
1456    }
1457
1458    private Element handleHeading()
1459        throws IOException
1460    {
1461        Element el = null;
1462
1463        final int ch  = nextToken();
1464
1465        final Heading hd = new Heading();
1466
1467        if( ch == '!' )
1468        {
1469            final int ch2 = nextToken();
1470
1471            if( ch2 == '!' )
1472            {
1473                final String title = peekAheadLine();
1474
1475                el = makeHeading( Heading.HEADING_LARGE, title, hd);
1476            }
1477            else
1478            {
1479                pushBack( ch2 );
1480                final String title = peekAheadLine();
1481                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1482            }
1483        }
1484        else
1485        {
1486            pushBack( ch );
1487            final String title = peekAheadLine();
1488            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1489        }
1490
1491        callHeadingListenerChain( hd );
1492
1493        m_lastHeading = hd;
1494
1495        if( el != null ) pushElement(el);
1496
1497        return el;
1498    }
1499
1500    /**
1501     *  Reads the stream until the next EOL or EOF.  Note that it will also read the
1502     *  EOL from the stream.
1503     */
1504    private StringBuilder readUntilEOL()
1505        throws IOException
1506    {
1507        int ch;
1508        final StringBuilder buf = new StringBuilder( 256 );
1509
1510        while( true )
1511        {
1512            ch = nextToken();
1513
1514            if( ch == -1 )
1515                break;
1516
1517            buf.append( (char) ch );
1518
1519            if( ch == '\n' )
1520                break;
1521        }
1522        return buf;
1523    }
1524
1525    /** Controls whether italic is restarted after a paragraph shift */
1526
1527    private boolean m_restartitalic;
1528    private boolean m_restartbold;
1529
1530    private boolean m_newLine;
1531
1532    /**
1533     *  Starts a block level element, therefore closing
1534     *  a potential open paragraph tag.
1535     */
1536    private void startBlockLevel()
1537    {
1538        // These may not continue over block level limits in XHTML
1539
1540        popElement("i");
1541        popElement("b");
1542        popElement("tt");
1543
1544        if( m_isOpenParagraph )
1545        {
1546            m_isOpenParagraph = false;
1547            popElement("p");
1548            m_plainTextBuf.append("\n"); // Just small beautification
1549        }
1550
1551        m_restartitalic = m_isitalic;
1552        m_restartbold   = m_isbold;
1553
1554        m_isitalic = false;
1555        m_isbold   = false;
1556    }
1557
1558    private static String getListType( final char c )
1559    {
1560        if( c == '*' )
1561        {
1562            return "ul";
1563        }
1564        else if( c == '#' )
1565        {
1566            return "ol";
1567        }
1568        throw new InternalWikiException("Parser got faulty list type: "+c);
1569    }
1570    /**
1571     *  Like original handleOrderedList() and handleUnorderedList()
1572     *  however handles both ordered ('#') and unordered ('*') mixed together.
1573     */
1574
1575    // FIXME: Refactor this; it's a bit messy.
1576
1577    private Element handleGeneralList()
1578        throws IOException
1579    {
1580         startBlockLevel();
1581
1582         String strBullets = readWhile( "*#" );
1583         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
1584         final int numBullets = strBullets.length();
1585
1586         // override the beginning portion of bullet pattern to be like the previous
1587         // to simulate PHPWiki style lists
1588
1589         if(m_allowPHPWikiStyleLists)
1590         {
1591             // only substitute if different
1592             if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1593                   (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) )
1594             {
1595                 if(numBullets <= m_genlistlevel)
1596                 {
1597                     // Substitute all but the last character (keep the expressed bullet preference)
1598                     strBullets  = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "")
1599                                   + strBullets.charAt( numBullets-1 );
1600                 }
1601                 else
1602                 {
1603                     strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets);
1604                 }
1605             }
1606         }
1607
1608         //
1609         //  Check if this is still of the same type
1610         //
1611         if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1612            (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) )
1613         {
1614             if( numBullets > m_genlistlevel )
1615             {
1616                 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) );
1617
1618                 for( ; m_genlistlevel < numBullets; m_genlistlevel++ )
1619                 {
1620                     // bullets are growing, get from new bullet list
1621                     pushElement( new Element("li") );
1622                     pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) ));
1623                 }
1624             }
1625             else if( numBullets < m_genlistlevel )
1626             {
1627                 //  Close the previous list item.
1628                 // buf.append( m_renderer.closeListItem() );
1629                 popElement( "li" );
1630
1631                 for( ; m_genlistlevel > numBullets; m_genlistlevel-- )
1632                 {
1633                     // bullets are shrinking, get from old bullet list
1634
1635                     popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1636                     if( m_genlistlevel > 0 )
1637                     {
1638                         popElement( "li" );
1639                     }
1640
1641                 }
1642             }
1643             else
1644             {
1645                 if( m_genlistlevel > 0 )
1646                 {
1647                     popElement( "li" );
1648                 }
1649             }
1650         }
1651         else
1652         {
1653             //
1654             //  The pattern has changed, unwind and restart
1655             //
1656             int  numEqualBullets;
1657             final int  numCheckBullets;
1658
1659             // find out how much is the same
1660             numEqualBullets = 0;
1661             numCheckBullets = Math.min(numBullets,m_genlistlevel);
1662
1663             while( numEqualBullets < numCheckBullets )
1664             {
1665                 // if the bullets are equal so far, keep going
1666                 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets))
1667                     numEqualBullets++;
1668                 // otherwise giveup, we have found how many are equal
1669                 else
1670                     break;
1671             }
1672
1673             //unwind
1674             for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- )
1675             {
1676                 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) );
1677                 if( m_genlistlevel > numBullets )
1678                 {
1679                     popElement("li");
1680                 }
1681             }
1682
1683             //rewind
1684
1685             pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) );
1686             for(int i = numEqualBullets; i < numBullets; i++)
1687             {
1688                 pushElement( new Element("li") );
1689                 pushElement( new Element( getListType( strBullets.charAt(i) ) ) );
1690             }
1691             m_genlistlevel = numBullets;
1692         }
1693
1694         //
1695         //  Push a new list item, and eat away any extra whitespace
1696         //
1697         pushElement( new Element("li") );
1698         readWhile(" ");
1699
1700         // work done, remember the new bullet list (in place of old one)
1701         m_genlistBulletBuffer.setLength(0);
1702         m_genlistBulletBuffer.append(strBullets);
1703
1704         return m_currentElement;
1705    }
1706
1707    private Element unwindGeneralList()
1708    {
1709        //unwind
1710        for( ; m_genlistlevel > 0; m_genlistlevel-- )
1711        {
1712            popElement( "li" );
1713            popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1714        }
1715
1716        m_genlistBulletBuffer.setLength(0);
1717
1718        return null;
1719    }
1720
1721
1722    private Element handleDefinitionList()
1723        throws IOException
1724    {
1725        if( !m_isdefinition )
1726        {
1727            m_isdefinition = true;
1728
1729            startBlockLevel();
1730
1731            pushElement( new Element("dl") );
1732            return pushElement( new Element("dt") );
1733        }
1734
1735        return null;
1736    }
1737
1738    private Element handleOpenbracket()
1739        throws IOException
1740    {
1741        final StringBuilder sb = new StringBuilder(40);
1742        final int pos = getPosition();
1743        int ch = nextToken();
1744        boolean isPlugin = false;
1745
1746        if( ch == '[' )
1747        {
1748            if( m_wysiwygEditorMode )
1749            {
1750                sb.append( '[' );
1751            }
1752
1753            sb.append( (char)ch );
1754
1755            while( (ch = nextToken()) == '[' )
1756            {
1757                sb.append( (char)ch );
1758            }
1759        }
1760
1761
1762        if( ch == '{' )
1763        {
1764            isPlugin = true;
1765        }
1766
1767        pushBack( ch );
1768
1769        if( sb.length() > 0 )
1770        {
1771            m_plainTextBuf.append( sb );
1772            return m_currentElement;
1773        }
1774
1775        //
1776        //  Find end of hyperlink
1777        //
1778
1779        ch = nextToken();
1780        int nesting = 1;    // Check for nested plugins
1781
1782        while( ch != -1 )
1783        {
1784            final int ch2 = nextToken(); pushBack(ch2);
1785
1786            if( isPlugin )
1787            {
1788                if( ch == '[' && ch2 == '{' )
1789                {
1790                    nesting++;
1791                }
1792                else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' )
1793                {
1794                    break;
1795                }
1796                else if( ch == '}' && ch2 == ']' )
1797                {
1798                    // NB: This will be decremented once at the end
1799                    nesting--;
1800                }
1801            }
1802            else
1803            {
1804                if( ch == ']' )
1805                {
1806                    break;
1807                }
1808            }
1809
1810            sb.append( (char) ch );
1811
1812            ch = nextToken();
1813        }
1814
1815        //
1816        //  If the link is never finished, do some tricks to display the rest of the line
1817        //  unchanged.
1818        //
1819        if( ch == -1 )
1820        {
1821            log.debug("Warning: unterminated link detected!");
1822            m_isEscaping = true;
1823            m_plainTextBuf.append( sb );
1824            flushPlainText();
1825            m_isEscaping = false;
1826            return m_currentElement;
1827        }
1828
1829        return handleHyperlinks( sb.toString(), pos );
1830    }
1831
1832    /**
1833     *  Reads the stream until the current brace is closed or stream end.
1834     */
1835    private String readBraceContent( final char opening, final char closing )
1836        throws IOException
1837    {
1838        final StringBuilder sb = new StringBuilder(40);
1839        int braceLevel = 1;
1840        int ch;
1841        while(( ch = nextToken() ) != -1 )
1842        {
1843            if( ch == '\\' )
1844            {
1845                continue;
1846            }
1847            else if ( ch == opening )
1848            {
1849                braceLevel++;
1850            }
1851            else if ( ch == closing )
1852            {
1853                braceLevel--;
1854                if (braceLevel==0)
1855                {
1856                  break;
1857                }
1858            }
1859            sb.append( (char)ch );
1860        }
1861        return sb.toString();
1862    }
1863
1864
1865    /**
1866     *  Handles constructs of type %%(style) and %%class
1867     * @param newLine
1868     * @return An Element containing the div or span, depending on the situation.
1869     * @throws IOException
1870     */
1871    private Element handleDiv( final boolean newLine )
1872        throws IOException
1873    {
1874        int ch = nextToken();
1875        Element el = null;
1876
1877        if( ch == '%' )
1878        {
1879            String style = null;
1880            String clazz = null;
1881
1882            ch = nextToken();
1883
1884            //
1885            //  Style or class?
1886            //
1887            if( ch == '(' )
1888            {
1889                style = readBraceContent('(',')');
1890            }
1891            else if( Character.isLetter( (char) ch ) )
1892            {
1893                pushBack( ch );
1894                clazz = readUntil( "( \t\n\r" );
1895                //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters
1896                //CSS Classnames can contain only the characters [a-zA-Z0-9] and
1897                //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_".
1898                //They cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1899
1900                //(1) replace '.' by spaces, allowing multiple classnames on a div or span
1901                //(2) remove any invalid character
1902                if( clazz != null){
1903
1904                    clazz = clazz.replace('.', ' ')
1905                                 .replaceAll("[^\\s-_\\w\\x200-\\x377]+","");
1906
1907                }
1908                ch = nextToken();
1909
1910                //check for %%class1.class2( style information )
1911                if( ch == '(' )
1912                {
1913                    style = readBraceContent('(',')');
1914                }
1915                //
1916                //  Pop out only spaces, so that the upcoming EOL check does not check the
1917                //  next line.
1918                //
1919                else if( ch == '\n' || ch == '\r' )
1920                {
1921                    pushBack(ch);
1922                }
1923            }
1924            else
1925            {
1926                //
1927                // Anything else stops.
1928                //
1929
1930                pushBack(ch);
1931
1932                try
1933                {
1934                    final Boolean isSpan = m_styleStack.pop();
1935
1936                    if( isSpan == null )
1937                    {
1938                        // Fail quietly
1939                    }
1940                    else if( isSpan.booleanValue() )
1941                    {
1942                        el = popElement( "span" );
1943                    }
1944                    else
1945                    {
1946                        el = popElement( "div" );
1947                    }
1948                }
1949                catch( final EmptyStackException e )
1950                {
1951                    log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened.");
1952                    return m_currentElement;
1953                }
1954
1955                return el;
1956            }
1957
1958            //
1959            //  Check if there is an attempt to do something nasty
1960            //
1961
1962            try
1963            {
1964                style = StringEscapeUtils.unescapeHtml4(style);
1965                if( style != null && style.indexOf("javascript:") != -1 )
1966                {
1967                    log.debug("Attempt to output javascript within CSS:"+style);
1968                    final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1969                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
1970                }
1971            }
1972            catch( final NumberFormatException e )
1973            {
1974                //
1975                //  If there are unknown entities, we don't want the parser to stop.
1976                //
1977                final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1978                final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
1979                return addElement( makeError( msg ) );
1980            }
1981
1982            //
1983            //  Decide if we should open a div or a span?
1984            //
1985            final String eol = peekAheadLine();
1986
1987            if( !eol.trim().isEmpty() )
1988            {
1989                // There is stuff after the class
1990
1991                el = new Element("span");
1992
1993                m_styleStack.push( Boolean.TRUE );
1994            }
1995            else
1996            {
1997                startBlockLevel();
1998                el = new Element("div");
1999                m_styleStack.push( Boolean.FALSE );
2000            }
2001
2002            if( style != null ) el.setAttribute("style", style);
2003            if( clazz != null ) el.setAttribute("class", clazz);
2004            el = pushElement( el );
2005
2006            return el;
2007        }
2008
2009        pushBack(ch);
2010
2011        return el;
2012    }
2013
2014    private Element handleSlash( final boolean newLine )
2015        throws IOException
2016    {
2017        final int ch = nextToken();
2018
2019        pushBack(ch);
2020        if( ch == '%' && !m_styleStack.isEmpty() )
2021        {
2022            return handleDiv( newLine );
2023        }
2024
2025        return null;
2026    }
2027
2028    private Element handleBar( final boolean newLine )
2029        throws IOException
2030    {
2031        Element el = null;
2032
2033        if( !m_istable && !newLine )
2034        {
2035            return null;
2036        }
2037
2038        //
2039        //  If the bar is in the first column, we will either start
2040        //  a new table or continue the old one.
2041        //
2042
2043        if( newLine )
2044        {
2045            if( !m_istable )
2046            {
2047                startBlockLevel();
2048                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
2049                m_istable = true;
2050                m_rowNum = 0;
2051            }
2052
2053            m_rowNum++;
2054            final Element tr = ( m_rowNum % 2 != 0 )
2055                       ? new Element("tr").setAttribute("class", "odd")
2056                       : new Element("tr");
2057            el = pushElement( tr );
2058        }
2059
2060        //
2061        //  Check out which table cell element to start;
2062        //  a header element (th) or a regular element (td).
2063        //
2064        final int ch = nextToken();
2065
2066        if( ch == '|' )
2067        {
2068            if( !newLine )
2069            {
2070                el = popElement("th");
2071                if( el == null ) popElement("td");
2072            }
2073            el = pushElement( new Element("th") );
2074        }
2075        else
2076        {
2077            if( !newLine )
2078            {
2079                el = popElement("td");
2080                if( el == null ) popElement("th");
2081            }
2082
2083            el = pushElement( new Element("td") );
2084
2085            pushBack( ch );
2086        }
2087
2088        return el;
2089    }
2090
2091    /**
2092     *  Generic escape of next character or entity.
2093     */
2094    private Element handleTilde()
2095        throws IOException
2096    {
2097        final int ch = nextToken();
2098
2099        if( ch == ' ' )
2100        {
2101            if( m_wysiwygEditorMode )
2102            {
2103                m_plainTextBuf.append( "~ " );
2104            }
2105            return m_currentElement;
2106        }
2107
2108        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
2109            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
2110            ch == '{' || ch == ']' || ch == '}' || ch == '%' )
2111        {
2112            if( m_wysiwygEditorMode )
2113            {
2114                m_plainTextBuf.append( '~' );
2115            }
2116
2117            m_plainTextBuf.append( (char)ch );
2118            m_plainTextBuf.append(readWhile( ""+(char)ch ));
2119            return m_currentElement;
2120        }
2121
2122        // No escape.
2123        pushBack( ch );
2124
2125        return null;
2126    }
2127
2128    private void fillBuffer( final Element startElement )
2129        throws IOException
2130    {
2131        m_currentElement = startElement;
2132
2133        boolean quitReading = false;
2134        m_newLine = true;
2135        disableOutputEscaping();
2136
2137        while(!quitReading)
2138        {
2139            final int ch = nextToken();
2140
2141            if( ch == -1 ) break;
2142
2143            //
2144            //  Check if we're actually ending the preformatted mode.
2145            //  We still must do an entity transformation here.
2146            //
2147            if( m_isEscaping )
2148            {
2149                if( ch == '}' )
2150                {
2151                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
2152                }
2153                else if( ch == -1 )
2154                {
2155                    quitReading = true;
2156                }
2157                else if( ch == '\r' )
2158                {
2159                    // DOS line feeds we ignore.
2160                }
2161                else if( ch == '<' )
2162                {
2163                    m_plainTextBuf.append( "&lt;" );
2164                }
2165                else if( ch == '>' )
2166                {
2167                    m_plainTextBuf.append( "&gt;" );
2168                }
2169                else if( ch == '&' )
2170                {
2171                    m_plainTextBuf.append( "&amp;" );
2172                }
2173                else if( ch == '~' )
2174                {
2175                    String braces = readWhile("}");
2176                    if( braces.length() >= 3 )
2177                    {
2178                        m_plainTextBuf.append("}}}");
2179
2180                        braces = braces.substring(3);
2181                    }
2182                    else
2183                    {
2184                        m_plainTextBuf.append( (char) ch );
2185                    }
2186
2187                    for( int i = braces.length()-1; i >= 0; i-- )
2188                    {
2189                        pushBack(braces.charAt(i));
2190                    }
2191                }
2192                else
2193                {
2194                    m_plainTextBuf.append( (char) ch );
2195                }
2196
2197                continue;
2198            }
2199
2200            //
2201            //  An empty line stops a list
2202            //
2203            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 )
2204            {
2205                m_plainTextBuf.append(unwindGeneralList());
2206            }
2207
2208            if( m_newLine && ch != '|' && m_istable )
2209            {
2210                popElement("table");
2211                m_istable = false;
2212            }
2213
2214            int skip = IGNORE;
2215
2216            //
2217            //  Do the actual parsing and catch any errors.
2218            //
2219            try
2220            {
2221                skip = parseToken( ch );
2222            }
2223            catch( final IllegalDataException e )
2224            {
2225                log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage());
2226
2227                makeError("Error: "+cleanupSuspectData(e.getMessage()) );
2228            }
2229
2230            //
2231            //   The idea is as follows:  If the handler method returns
2232            //   an element (el != null), it is assumed that it has been
2233            //   added in the stack.  Otherwise the character is added
2234            //   as is to the plaintext buffer.
2235            //
2236            //   For the transition phase, if s != null, it also gets
2237            //   added in the plaintext buffer.
2238            //
2239
2240            switch( skip )
2241            {
2242                case ELEMENT:
2243                    m_newLine = false;
2244                    break;
2245
2246                case CHARACTER:
2247                    m_plainTextBuf.append( (char) ch );
2248                    m_newLine = false;
2249                    break;
2250
2251                case IGNORE:
2252                default:
2253                    break;
2254            }
2255        }
2256
2257        closeHeadings();
2258        popElement("domroot");
2259    }
2260
2261    private String cleanupSuspectData( final String s )
2262    {
2263        final StringBuilder sb = new StringBuilder( s.length() );
2264
2265        for( int i = 0; i < s.length(); i++ )
2266        {
2267            final char c = s.charAt(i);
2268
2269            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
2270            else sb.append( "0x" ).append( Integer.toString( c, 16 ).toUpperCase() );
2271        }
2272
2273        return sb.toString();
2274    }
2275
2276    /** The token is a plain character. */
2277    protected static final int CHARACTER = 0;
2278
2279    /** The token is a wikimarkup element. */
2280    protected static final int ELEMENT   = 1;
2281
2282    /** The token is to be ignored. */
2283    protected static final int IGNORE    = 2;
2284
2285    /**
2286     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
2287     *  you think this was a wiki markup element, and IGNORE, if you think
2288     *  we should ignore this altogether.
2289     *  <p>
2290     *  To add your own MarkupParser, you can override this method, but it
2291     *  is recommended that you call super.parseToken() as well to gain advantage
2292     *  of JSPWiki's own markup.  You can call it at the start of your own
2293     *  parseToken() or end - it does not matter.
2294     *
2295     * @param ch The character under investigation
2296     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2297     * @throws IOException If parsing fails.
2298     */
2299    protected int parseToken( final int ch )
2300        throws IOException
2301    {
2302        Element el = null;
2303
2304        //
2305        //  Now, check the incoming token.
2306        //
2307        switch( ch )
2308        {
2309          case '\r':
2310            // DOS linefeeds we forget
2311            return IGNORE;
2312
2313          case '\n':
2314            //
2315            //  Close things like headings, etc.
2316            //
2317
2318            // FIXME: This is not really very fast
2319
2320            closeHeadings();
2321
2322            popElement("dl"); // Close definition lists.
2323            if( m_istable )
2324            {
2325                popElement("tr");
2326            }
2327
2328            m_isdefinition = false;
2329
2330            if( m_newLine )
2331            {
2332                // Paragraph change.
2333                startBlockLevel();
2334
2335                //
2336                //  Figure out which elements cannot be enclosed inside
2337                //  a <p></p> pair according to XHTML rules.
2338                //
2339                final String nextLine = peekAheadLine();
2340                if( nextLine.isEmpty() ||
2341                    (!nextLine.isEmpty() &&
2342                     !nextLine.startsWith("{{{") &&
2343                     !nextLine.startsWith("----") &&
2344                     !nextLine.startsWith("%%") &&
2345                     "*#!;".indexOf( nextLine.charAt(0) ) == -1) )
2346                {
2347                    pushElement( new Element("p") );
2348                    m_isOpenParagraph = true;
2349
2350                    if( m_restartitalic )
2351                    {
2352                        pushElement( new Element("i") );
2353                        m_isitalic = true;
2354                        m_restartitalic = false;
2355                    }
2356                    if( m_restartbold )
2357                    {
2358                        pushElement( new Element("b") );
2359                        m_isbold = true;
2360                        m_restartbold = false;
2361                    }
2362                }
2363            }
2364            else
2365            {
2366                m_plainTextBuf.append("\n");
2367                m_newLine = true;
2368            }
2369            return IGNORE;
2370
2371
2372          case '\\':
2373            el = handleBackslash();
2374            break;
2375
2376          case '_':
2377            el = handleUnderscore();
2378            break;
2379
2380          case '\'':
2381            el = handleApostrophe();
2382            break;
2383
2384          case '{':
2385            el = handleOpenbrace( m_newLine );
2386            break;
2387
2388          case '}':
2389            el = handleClosebrace();
2390            break;
2391
2392          case '-':
2393            if( m_newLine )
2394                el = handleDash();
2395
2396            break;
2397
2398          case '!':
2399            if( m_newLine )
2400            {
2401                el = handleHeading();
2402            }
2403            break;
2404
2405          case ';':
2406            if( m_newLine )
2407            {
2408                el = handleDefinitionList();
2409            }
2410            break;
2411
2412          case ':':
2413            if( m_isdefinition )
2414            {
2415                popElement("dt");
2416                el = pushElement( new Element("dd") );
2417                m_isdefinition = false;
2418            }
2419            break;
2420
2421          case '[':
2422            el = handleOpenbracket();
2423            break;
2424
2425          case '*':
2426            if( m_newLine )
2427            {
2428                pushBack('*');
2429                el = handleGeneralList();
2430            }
2431            break;
2432
2433          case '#':
2434            if( m_newLine )
2435            {
2436                pushBack('#');
2437                el = handleGeneralList();
2438            }
2439            break;
2440
2441          case '|':
2442            el = handleBar( m_newLine );
2443            break;
2444
2445          case '~':
2446            el = handleTilde();
2447            break;
2448
2449          case '%':
2450            el = handleDiv( m_newLine );
2451            break;
2452
2453          case '/':
2454            el = handleSlash( m_newLine );
2455            break;
2456
2457          default:
2458            break;
2459        }
2460
2461        return el != null ? ELEMENT : CHARACTER;
2462    }
2463
2464    private void closeHeadings()
2465    {
2466        if( m_lastHeading != null && !m_wysiwygEditorMode )
2467        {
2468            // Add the hash anchor element at the end of the heading
2469            addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) );
2470            m_lastHeading = null;
2471        }
2472        popElement("h2");
2473        popElement("h3");
2474        popElement("h4");
2475    }
2476
2477    /**
2478     *  Parses the entire document from the Reader given in the constructor or
2479     *  set by {@link #setInputReader(Reader)}.
2480     *
2481     *  @return A WikiDocument, ready to be passed to the renderer.
2482     *  @throws IOException If parsing cannot be accomplished.
2483     */
2484    @Override
2485    public WikiDocument parse()
2486        throws IOException
2487    {
2488        final WikiDocument d = new WikiDocument( m_context.getPage() );
2489        d.setContext( m_context );
2490
2491        final Element rootElement = new Element("domroot");
2492
2493        d.setRootElement( rootElement );
2494
2495        fillBuffer( rootElement );
2496
2497        paragraphify(rootElement);
2498
2499        return d;
2500    }
2501
2502    /**
2503     *  Checks out that the first paragraph is correctly installed.
2504     *
2505     *  @param rootElement
2506     */
2507    private void paragraphify( final Element rootElement)
2508    {
2509        //
2510        //  Add the paragraph tag to the first paragraph
2511        //
2512        final List< Content > kids = rootElement.getContent();
2513
2514        if( rootElement.getChild("p") != null )
2515        {
2516            final ArrayList<Content> ls = new ArrayList<>();
2517            int idxOfFirstContent = 0;
2518            int count = 0;
2519
2520            for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ )
2521            {
2522                final Content c = i.next();
2523                if( c instanceof Element )
2524                {
2525                    final String name = ( ( Element )c ).getName();
2526                    if( isBlockLevel( name ) ) break;
2527                }
2528
2529                if( !(c instanceof ProcessingInstruction) )
2530                {
2531                    ls.add( c );
2532                    if( idxOfFirstContent == 0 ) idxOfFirstContent = count;
2533                }
2534            }
2535
2536            //
2537            //  If there were any elements, then add a new <p> (unless it would
2538            //  be an empty one)
2539            //
2540            if( ls.size() > 0 )
2541            {
2542                final Element newel = new Element("p");
2543
2544                for( final Iterator< Content > i = ls.iterator(); i.hasNext(); )
2545                {
2546                    final Content c = i.next();
2547
2548                    c.detach();
2549                    newel.addContent(c);
2550                }
2551
2552                //
2553                // Make sure there are no empty <p/> tags added.
2554                //
2555                if( !newel.getTextTrim().isEmpty() || !newel.getChildren().isEmpty() )
2556                    rootElement.addContent(idxOfFirstContent, newel);
2557            }
2558        }
2559    }
2560
2561}