Source code

001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.commons.text.StringEscapeUtils;
023import org.apache.log4j.Logger;
024import org.apache.oro.text.regex.MalformedPatternException;
025import org.apache.oro.text.regex.MatchResult;
026import org.apache.oro.text.regex.Pattern;
027import org.apache.oro.text.regex.PatternCompiler;
028import org.apache.oro.text.regex.PatternMatcher;
029import org.apache.oro.text.regex.Perl5Compiler;
030import org.apache.oro.text.regex.Perl5Matcher;
031import org.apache.wiki.InternalWikiException;
032import org.apache.wiki.StringTransmutator;
033import org.apache.wiki.api.core.Acl;
034import org.apache.wiki.api.core.Context;
035import org.apache.wiki.api.core.ContextEnum;
036import org.apache.wiki.api.core.Page;
037import org.apache.wiki.api.exceptions.PluginException;
038import org.apache.wiki.api.plugin.Plugin;
039import org.apache.wiki.api.spi.Wiki;
040import org.apache.wiki.attachment.AttachmentManager;
041import org.apache.wiki.auth.AuthorizationManager;
042import org.apache.wiki.auth.UserManager;
043import org.apache.wiki.auth.WikiSecurityException;
044import org.apache.wiki.auth.acl.AclManager;
045import org.apache.wiki.i18n.InternationalizationManager;
046import org.apache.wiki.preferences.Preferences;
047import org.apache.wiki.util.TextUtil;
048import org.apache.wiki.util.XmlUtil;
049import org.apache.wiki.variables.VariableManager;
050import org.jdom2.Attribute;
051import org.jdom2.Content;
052import org.jdom2.Element;
053import org.jdom2.IllegalDataException;
054import org.jdom2.ProcessingInstruction;
055import org.jdom2.Verifier;
056
057import javax.xml.transform.Result;
058import java.io.IOException;
059import java.io.Reader;
060import java.io.StringReader;
061import java.text.MessageFormat;
062import java.util.ArrayList;
063import java.util.Arrays;
064import java.util.Collection;
065import java.util.EmptyStackException;
066import java.util.HashMap;
067import java.util.Iterator;
068import java.util.List;
069import java.util.Map;
070import java.util.Properties;
071import java.util.ResourceBundle;
072import java.util.Stack;
073
074/**
075 *  Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the
076 *  heart and soul of JSPWiki : make sure you test properly anything that is added,
077 *  or else it breaks down horribly.
078 *
079 *  @since  2.4
080 */
081public class JSPWikiMarkupParser extends MarkupParser {
082
083    protected static final int              READ          = 0;
084    protected static final int              EDIT          = 1;
085    protected static final int              EMPTY         = 2;  // Empty message
086    protected static final int              LOCAL         = 3;
087    protected static final int              LOCALREF      = 4;
088    protected static final int              IMAGE         = 5;
089    protected static final int              EXTERNAL      = 6;
090    protected static final int              INTERWIKI     = 7;
091    protected static final int              IMAGELINK     = 8;
092    protected static final int              IMAGEWIKILINK = 9;
093    protected static final int              ATTACHMENT    = 10;
094
095    private static final Logger log = Logger.getLogger( JSPWikiMarkupParser.class );
096
097    private boolean        m_isbold       = false;
098    private boolean        m_isitalic     = false;
099    private boolean        m_istable      = false;
100    private boolean        m_isPre        = false;
101    private boolean        m_isEscaping   = false;
102    private boolean        m_isdefinition = false;
103    private boolean        m_isPreBlock   = false;
104
105    /** Contains style information, in multiple forms. */
106    private Stack<Boolean> m_styleStack   = new Stack<>();
107
108     // general list handling
109    private int            m_genlistlevel = 0;
110    private StringBuilder  m_genlistBulletBuffer = new StringBuilder(10);  // stores the # and * pattern
111    private boolean        m_allowPHPWikiStyleLists = true;
112
113    private boolean        m_isOpenParagraph = false;
114
115    /** Parser for extended link functionality. */
116    private LinkParser     m_linkParser = new LinkParser();
117
118    /** Keeps track of any plain text that gets put in the Text nodes */
119    private StringBuilder  m_plainTextBuf = new StringBuilder(20);
120
121    private Element        m_currentElement;
122
123    /** Keep track of duplicate header names.  */
124    private Map<String, Integer>   m_titleSectionCounter = new HashMap<>();
125
126    /** If true, then considers CamelCase links as well. */
127    private boolean                m_camelCaseLinks      = false;
128
129    /** If true, then generate special output for wysiwyg editing in certain cases */
130    private boolean                m_wysiwygEditorMode     = false;
131
132    /** If true, consider URIs that have no brackets as well. */
133    // FIXME: Currently reserved, but not used.
134    private boolean                m_plainUris           = false;
135
136    /** If true, all outward links use a small link image. */
137    private boolean                m_useOutlinkImage     = true;
138
139    private boolean                m_useAttachmentImage  = true;
140
141    /** If true, allows raw HTML. */
142    private boolean                m_allowHTML           = false;
143
144    private boolean                m_useRelNofollow      = false;
145
146    private PatternCompiler        m_compiler = new Perl5Compiler();
147
148    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
149
150    private PatternMatcher         m_camelCaseMatcher = new Perl5Matcher();
151    private Pattern                m_camelCasePattern;
152
153    private int                    m_rowNum              = 1;
154
155    private Heading                m_lastHeading         = null;
156
157    private static final String CAMELCASE_PATTERN     = "JSPWikiMarkupParser.camelCasePattern";
158
159    /**
160     *  Creates a markup parser.
161     *
162     *  @param context The WikiContext which controls the parsing
163     *  @param in Where the data is read from.
164     */
165    public JSPWikiMarkupParser( final Context context, final Reader in )
166    {
167        super( context, in );
168        initialize();
169    }
170
171    // FIXME: parsers should be pooled for better performance.
172    private void initialize() {
173        initInlineImagePatterns();
174
175        m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN );
176        if( m_camelCasePattern == null ) {
177            try {
178                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
179            } catch( final MalformedPatternException e ) {
180                log.fatal("Internal error: Someone put in a faulty pattern.",e);
181                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e);
182            }
183            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
184        }
185
186        //  Set the properties.
187        final Properties props = m_engine.getWikiProperties();
188        final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
189
190        if( cclinks != null ) {
191            m_camelCaseLinks = TextUtil.isPositive( cclinks );
192        } else {
193            m_camelCaseLinks  = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks );
194        }
195
196        final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE );
197        if( wysiwygVariable != null ) {
198            m_wysiwygEditorMode = wysiwygVariable;
199        }
200
201        m_plainUris          = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris );
202        m_useOutlinkImage    = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage );
203        m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage );
204        m_allowHTML          = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML );
205        m_useRelNofollow     = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow );
206
207        if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) {
208            disableAccessRules();
209        }
210
211        m_context.getPage().setHasMetadata();
212    }
213
214    /**
215     *  Calls a transmutator chain.
216     *
217     *  @param list Chain to call
218     *  @param text Text that should be passed to the mutate() method of each of the mutators in the chain.
219     *  @return The result of the mutation.
220     */
221    protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) {
222        if( list == null || list.size() == 0 ) {
223            return text;
224        }
225
226        for( final StringTransmutator m : list ) {
227            text = m.mutate( m_context, text );
228        }
229
230        return text;
231    }
232
233    /**
234     * Calls the heading listeners.
235     *
236     * @param param A Heading object.
237     */
238    protected void callHeadingListenerChain( final Heading param ) {
239        final List< HeadingListener > list = m_headingListenerChain;
240        for( final HeadingListener h : list ) {
241            h.headingAdded( m_context, param );
242        }
243    }
244
245    /**
246     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
247     *  if you really know what you are doing.
248     *
249     *  @param type One of the types above
250     *  @param link URL to which to link to
251     *  @param text Link text
252     *  @param section If a particular section identifier is required.
253     *  @return An A element.
254     *  @since 2.4.78
255     */
256    protected Element createAnchor( final int type, final String link, String text, String section)
257    {
258        text = escapeHTMLEntities( text );
259        section = escapeHTMLEntities( section );
260        final Element el = new Element("a");
261        el.setAttribute("class",CLASS_TYPES[type]);
262        el.setAttribute("href",link+section);
263        el.addContent(text);
264        return el;
265    }
266
267    private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes )
268    {
269        Element el = null;
270
271        if( text == null ) text = link;
272
273        text = callMutatorChain( m_linkMutators, text );
274
275        section = (section != null) ? ("#"+section) : "";
276
277        // Make sure we make a link name that can be accepted
278        // as a valid URL.
279
280        if( link.length() == 0 )
281        {
282            type = EMPTY;
283        }
284        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
285
286        switch(type)
287        {
288            case READ:
289                el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section );
290                break;
291
292            case EDIT:
293                el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" );
294                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
295
296                break;
297
298            case EMPTY:
299                el = new Element("u").addContent(text);
300                break;
301
302                //
303                //  These two are for local references - footnotes and
304                //  references to footnotes.
305                //  We embed the page name (or whatever WikiContext gives us)
306                //  to make sure the links are unique across Wiki.
307                //
308            case LOCALREF:
309                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
310                break;
311
312            case LOCAL:
313                el = new Element("a").setAttribute("class",CLASS_FOOTNOTE);
314                el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1));
315                el.addContent("["+text+"]");
316                break;
317
318                //
319                //  With the image, external and interwiki types we need to
320                //  make sure nobody can put in Javascript or something else
321                //  annoying into the links themselves.  We do this by preventing
322                //  a haxor from stopping the link name short with quotes in
323                //  fillBuffer().
324                //
325            case IMAGE:
326                el = new Element("img").setAttribute("class","inline");
327                el.setAttribute("src",link);
328                el.setAttribute("alt",text);
329                break;
330
331            case IMAGELINK:
332                el = new Element("img").setAttribute("class","inline");
333                el.setAttribute("src",link);
334                el.setAttribute("alt",text);
335                el = createAnchor(IMAGELINK,text,"","").addContent(el);
336                break;
337
338            case IMAGEWIKILINK:
339                final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text );
340                el = new Element("img").setAttribute("class","inline");
341                el.setAttribute("src",link);
342                el.setAttribute("alt",text);
343                el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el);
344                break;
345
346            case EXTERNAL:
347                el = createAnchor( EXTERNAL, link, text, section );
348                if( m_useRelNofollow ) el.setAttribute("rel","nofollow");
349                break;
350
351            case INTERWIKI:
352                el = createAnchor( INTERWIKI, link, text, section );
353                break;
354
355            case ATTACHMENT:
356                final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link );
357                final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link );
358                final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" );
359                el = createAnchor( ATTACHMENT, attlink, text, "" );
360
361                if(  m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) {
362                    el.setAttribute("download", "");
363                }
364
365                pushElement(el);
366                popElement(el.getName());
367
368                if( m_useAttachmentImage )
369                {
370                    el = new Element("img").setAttribute("src",imglink);
371                    el.setAttribute("border","0");
372                    el.setAttribute("alt","(info)");
373
374                    el = new Element("a").setAttribute("href",infolink).addContent(el);
375                    el.setAttribute("class","infolink");
376                }
377                else
378                {
379                    el = null;
380                }
381                break;
382
383            default:
384                break;
385        }
386
387        if( el != null && attributes != null )
388        {
389            while( attributes.hasNext() )
390            {
391                final Attribute attr = attributes.next();
392                if( attr != null )
393                {
394                    el.setAttribute(attr);
395                }
396            }
397        }
398
399        if( el != null )
400        {
401            flushPlainText();
402            m_currentElement.addContent( el );
403        }
404        return el;
405    }
406
407    /**
408     *  These are all of the HTML 4.01 block-level elements.
409     */
410    private static final String[] BLOCK_ELEMENTS = {
411        "address", "blockquote", "div", "dl", "fieldset", "form",
412        "h1", "h2", "h3", "h4", "h5", "h6",
413        "hr", "noscript", "ol", "p", "pre", "table", "ul"
414    };
415
416    private static boolean isBlockLevel( final String name )
417    {
418        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
419    }
420
421    /**
422     *  This method peeks ahead in the stream until EOL and returns the result.
423     *  It will keep the buffers untouched.
424     *
425     *  @return The string from the current position to the end of line.
426     */
427
428    // FIXME: Always returns an empty line, even if the stream is full.
429    private String peekAheadLine()
430        throws IOException
431    {
432        final String s = readUntilEOL().toString();
433
434        if( s.length() > PUSHBACK_BUFFER_SIZE )
435        {
436            log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters.  Attempting to recover...");
437            pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) );
438        }
439        else
440        {
441            try
442            {
443                pushBack( s );
444            }
445            catch( final IOException e )
446            {
447                log.warn("Pushback failed: the line is probably too long.  Attempting to recover.");
448            }
449        }
450        return s;
451    }
452
453    private int flushPlainText()
454    {
455        final int numChars = m_plainTextBuf.length();
456
457        if( numChars > 0 )
458        {
459            String buf;
460
461            if( !m_allowHTML )
462            {
463                buf = escapeHTMLEntities(m_plainTextBuf.toString());
464            }
465            else
466            {
467                buf = m_plainTextBuf.toString();
468            }
469            //
470            //  We must first empty the buffer because the side effect of
471            //  calling makeCamelCaseLink() is to call this routine.
472            //
473
474            m_plainTextBuf = new StringBuilder(20);
475
476            try
477            {
478                //
479                //  This is the heaviest part of parsing, and therefore we can
480                //  do some optimization here.
481                //
482                //  1) Only when the length of the buffer is big enough, we try to do the match
483                //
484
485                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 )
486                {
487                    // System.out.println("Buffer="+buf);
488
489                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) )
490                    {
491                        final MatchResult result = m_camelCaseMatcher.getMatch();
492
493                        final String firstPart = buf.substring(0,result.beginOffset(0));
494                        String prefix = result.group(1);
495
496                        if( prefix == null ) prefix = "";
497
498                        final String camelCase = result.group(2);
499                        final String protocol  = result.group(3);
500                        String uri       = protocol+result.group(4);
501                        buf              = buf.substring(result.endOffset(0));
502
503                        m_currentElement.addContent( firstPart );
504
505                        //
506                        //  Check if the user does not wish to do URL or WikiWord expansion
507                        //
508                        if( prefix.endsWith("~") || prefix.indexOf('[') != -1 )
509                        {
510                            if( prefix.endsWith("~") )
511                            {
512                                if( m_wysiwygEditorMode )
513                                {
514                                    m_currentElement.addContent( "~" );
515                                }
516                                prefix = prefix.substring(0,prefix.length()-1);
517                            }
518                            if( camelCase != null )
519                            {
520                                m_currentElement.addContent( prefix+camelCase );
521                            }
522                            else if( protocol != null )
523                            {
524                                m_currentElement.addContent( prefix+uri );
525                            }
526                            continue;
527                        }
528
529                        //
530                        //  Fine, then let's check what kind of a link this was
531                        //  and emit the proper elements
532                        //
533                        if( protocol != null )
534                        {
535                            final char c = uri.charAt(uri.length()-1);
536                            if( c == '.' || c == ',' )
537                            {
538                                uri = uri.substring(0,uri.length()-1);
539                                buf = c + buf;
540                            }
541                            // System.out.println("URI match "+uri);
542                            m_currentElement.addContent( prefix );
543                            makeDirectURILink( uri );
544                        }
545                        else
546                        {
547                            // System.out.println("Matched: '"+camelCase+"'");
548                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
549                            // System.out.println("prefix="+prefix);
550                            m_currentElement.addContent( prefix );
551
552                            makeCamelCaseLink( camelCase );
553                        }
554                    }
555
556                    m_currentElement.addContent( buf );
557                }
558                else
559                {
560                    //
561                    //  No camelcase asked for, just add the elements
562                    //
563                    m_currentElement.addContent( buf );
564                }
565            }
566            catch( final IllegalDataException e )
567            {
568                //
569                // Sometimes it's possible that illegal XML chars is added to the data.
570                // Here we make sure it does not stop parsing.
571                //
572                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
573            }
574        }
575
576        return numChars;
577    }
578
579    /**
580     *  Escapes XML entities in a HTML-compatible way (i.e. does not escape
581     *  entities that are already escaped).
582     *
583     *  @param buf
584     *  @return An escaped string.
585     */
586    private String escapeHTMLEntities( final String buf)
587    {
588        final StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
589
590        for( int i = 0; i < buf.length(); i++ )
591        {
592            final char ch = buf.charAt(i);
593
594            if( ch == '<' )
595            {
596                tmpBuf.append("&lt;");
597            }
598            else if( ch == '>' )
599            {
600                tmpBuf.append("&gt;");
601            }
602            else if( ch == '\"' )
603            {
604                tmpBuf.append("&quot;");
605            }
606            else if( ch == '&' )
607            {
608                //
609                //  If the following is an XML entity reference (&#.*;) we'll
610                //  leave it as it is; otherwise we'll replace it with an &amp;
611                //
612
613                boolean isEntity = false;
614                final StringBuilder entityBuf = new StringBuilder();
615
616                if( i < buf.length() -1 )
617                {
618                    for( int j = i; j < buf.length(); j++ )
619                    {
620                        final char ch2 = buf.charAt(j);
621
622                        if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' )
623                        {
624                            entityBuf.append(ch2);
625
626                            if( ch2 == ';' )
627                            {
628                                isEntity = true;
629                                break;
630                            }
631                        }
632                        else
633                        {
634                            break;
635                        }
636                    }
637                }
638
639                if( isEntity )
640                {
641                    tmpBuf.append( entityBuf );
642                    i = i + entityBuf.length() - 1;
643                }
644                else
645                {
646                    tmpBuf.append("&amp;");
647                }
648
649            }
650            else
651            {
652                tmpBuf.append( ch );
653            }
654        }
655
656        return tmpBuf.toString();
657    }
658
659    private Element pushElement( final Element e )
660    {
661        flushPlainText();
662        m_currentElement.addContent( e );
663        m_currentElement = e;
664
665        return e;
666    }
667
668    private Element addElement( final Content e )
669    {
670        if( e != null )
671        {
672            flushPlainText();
673            m_currentElement.addContent( e );
674        }
675        return m_currentElement;
676    }
677
678    /**
679     *  All elements that can be empty by the HTML DTD.
680     */
681    //  Keep sorted.
682    private static final String[] EMPTY_ELEMENTS = {
683        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
684    };
685
686    /**
687     *  Goes through the current element stack and pops all elements until this
688     *  element is found - this essentially "closes" and element.
689     *
690     *  @param s
691     *  @return The new current element, or null, if there was no such element in the entire stack.
692     */
693    private Element popElement( final String s )
694    {
695        final int flushedBytes = flushPlainText();
696
697        Element currEl = m_currentElement;
698
699        while( currEl.getParentElement() != null )
700        {
701            if( currEl.getName().equals(s) && !currEl.isRootElement() )
702            {
703                m_currentElement = currEl.getParentElement();
704
705                //
706                //  Check if it's okay for this element to be empty.  Then we will
707                //  trick the JDOM generator into not generating an empty element,
708                //  by putting an empty string between the tags.  Yes, it's a kludge
709                //  but what'cha gonna do about it. :-)
710                //
711
712                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 )
713                {
714                    currEl.addContent("");
715                }
716
717                return m_currentElement;
718            }
719
720            currEl = currEl.getParentElement();
721        }
722
723        return null;
724    }
725
726
727    /**
728     *  Reads the stream until it meets one of the specified
729     *  ending characters, or stream end.  The ending character will be left
730     *  in the stream.
731     */
732    private String readUntil( final String endChars )
733        throws IOException
734    {
735        final StringBuilder sb = new StringBuilder( 80 );
736        int ch = nextToken();
737
738        while( ch != -1 )
739        {
740            if( ch == '\\' )
741            {
742                ch = nextToken();
743                if( ch == -1 )
744                {
745                    break;
746                }
747            }
748            else
749            {
750                if( endChars.indexOf((char)ch) != -1 )
751                {
752                    pushBack( ch );
753                    break;
754                }
755            }
756            sb.append( (char) ch );
757            ch = nextToken();
758        }
759
760        return sb.toString();
761    }
762
763    /**
764     *  Reads the stream while the characters that have been specified are
765     *  in the stream, returning then the result as a String.
766     */
767    private String readWhile( final String endChars )
768        throws IOException
769    {
770        final StringBuilder sb = new StringBuilder( 80 );
771        int ch = nextToken();
772
773        while( ch != -1 )
774        {
775            if( endChars.indexOf((char)ch) == -1 )
776            {
777                pushBack( ch );
778                break;
779            }
780
781            sb.append( (char) ch );
782            ch = nextToken();
783        }
784
785        return sb.toString();
786    }
787
788    private JSPWikiMarkupParser m_cleanTranslator;
789
790    /**
791     *  Does a lazy init.  Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before
792     *  the TranslatorReader it is contained by is up.
793     */
794    private JSPWikiMarkupParser getCleanTranslator() {
795        if( m_cleanTranslator == null ) {
796            final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() );
797            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
798            m_cleanTranslator.m_allowHTML = true;
799        }
800
801        return m_cleanTranslator;
802    }
803    /**
804     *  Modifies the "hd" parameter to contain proper values.  Because
805     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
806     *  % after url encoding with '_'.
807     *  <p>
808     *  Counts also duplicate headings (= headings with similar name), and
809     *  attaches a counter.
810     */
811    private String makeHeadingAnchor( final String baseName, String title, final Heading hd ) {
812        hd.m_titleText = title;
813        title = MarkupParser.wikifyLink( title );
814        hd.m_titleSection = m_engine.encodeName(title);
815
816        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) {
817            final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1;
818            m_titleSectionCounter.put( hd.m_titleSection, count );
819            hd.m_titleSection += "-" + count;
820        } else {
821            m_titleSectionCounter.put( hd.m_titleSection, 1 );
822        }
823
824        hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection;
825        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
826        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
827
828        return hd.m_titleAnchor;
829    }
830
831    private String makeSectionTitle( String title ) {
832        title = title.trim();
833        try {
834            final JSPWikiMarkupParser dtr = getCleanTranslator();
835            dtr.setInputReader( new StringReader( title ) );
836            final WikiDocument doc = dtr.parse();
837            doc.setContext( m_context );
838
839            return XmlUtil.extractTextFromDocument( doc );
840        } catch( final IOException e ) {
841            log.fatal("Title parsing not working", e );
842            throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e );
843        }
844    }
845
846    /**
847     *  Returns XHTML for the heading.
848     *
849     *  @param level The level of the heading.  @see Heading
850     *  @param title the title for the heading
851     *  @param hd a List to which heading should be added
852     *  @return An Element containing the heading
853     */
854    public Element makeHeading( final int level, final String title, final Heading hd ) {
855        final Element el;
856        final String pageName = m_context.getPage().getName();
857        final String outTitle = makeSectionTitle( title );
858        hd.m_level = level;
859
860        switch( level ) {
861          case Heading.HEADING_SMALL:
862            el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
863            break;
864
865          case Heading.HEADING_MEDIUM:
866            el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
867            break;
868
869          case Heading.HEADING_LARGE:
870            el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
871            break;
872
873          default:
874            throw new InternalWikiException( "Illegal heading type " + level );
875        }
876
877        return el;
878    }
879
880    /**
881     *  When given a link to a WikiName, we just return
882     *  a proper HTML link for it.  The local link mutator
883     *  chain is also called.
884     */
885    private Element makeCamelCaseLink( final String wikiname )
886    {
887        final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname );
888
889        callMutatorChain( m_localLinkMutatorChain, wikiname );
890
891        if( matchedLink != null ) {
892            makeLink( READ, matchedLink, wikiname, null, null );
893        } else {
894            makeLink( EDIT, wikiname, wikiname, null, null );
895        }
896
897        return m_currentElement;
898    }
899
900    /** Holds the image URL for the duration of this parser */
901    private String m_outlinkImageURL = null;
902
903    /**
904     *  Returns an element for the external link image (out.png).  However,
905     *  this method caches the URL for the lifetime of this MarkupParser,
906     *  because it's commonly used, and we'll end up with possibly hundreds
907     *  our thousands of references to it...  It's a lot faster, too.
908     *
909     *  @return  An element containing the HTML for the outlink image.
910     */
911    private Element outlinkImage()
912    {
913        Element el = null;
914
915        if( m_useOutlinkImage )
916        {
917            if( m_outlinkImageURL == null )
918            {
919                m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE );
920            }
921
922            el = new Element( "img" ).setAttribute( "class", OUTLINK );
923            el.setAttribute( "src", m_outlinkImageURL );
924            el.setAttribute( "alt","" );
925        }
926
927        return el;
928    }
929
930    /**
931     *  Takes an URL and turns it into a regular wiki link.  Unfortunately,
932     *  because of the way that flushPlainText() works, it already encodes
933     *  all of the XML entities.  But so does WikiContext.getURL(), so we
934     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
935     *  <p>
936     *  What a crappy problem.
937     *
938     * @param url
939     * @return An anchor Element containing the link.
940     */
941    private Element makeDirectURILink( String url ) {
942        final Element result;
943        String last = null;
944
945        if( url.endsWith( "," ) || url.endsWith( "." ) ) {
946            last = url.substring( url.length() - 1 );
947            url = url.substring( 0, url.length() - 1 );
948        }
949
950        callMutatorChain( m_externalLinkMutatorChain, url );
951
952        if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) {
953            result = handleImageLink( StringUtils.replace( url, "&amp;", "&" ), url, false );
954        } else {
955            result = makeLink( EXTERNAL, StringUtils.replace( url, "&amp;", "&" ), url, null, null );
956            addElement( outlinkImage() );
957        }
958
959        if( last != null ) {
960            m_plainTextBuf.append( last );
961        }
962
963        return result;
964    }
965
966    /**
967     *  Image links are handled differently:
968     *  1. If the text is a WikiName of an existing page,
969     *     it gets linked.
970     *  2. If the text is an external link, then it is inlined.
971     *  3. Otherwise it becomes an ALT text.
972     *
973     *  @param reallink The link to the image.
974     *  @param link     Link text portion, may be a link to somewhere else.
975     *  @param hasLinkText If true, then the defined link had a link text available.
976     *                  This means that the link text may be a link to a wiki page,
977     *                  or an external resource.
978     */
979
980    // FIXME: isExternalLink() is called twice.
981    private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText )
982    {
983        final String possiblePage = MarkupParser.cleanLink( link );
984
985        if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText )
986        {
987            return makeLink( IMAGELINK, reallink, link, null, null );
988        }
989        else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText )
990        {
991            // System.out.println("Orig="+link+", Matched: "+matchedLink);
992            callMutatorChain( m_localLinkMutatorChain, possiblePage );
993
994            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
995        }
996        else
997        {
998            return makeLink( IMAGE, reallink, link, null, null );
999        }
1000    }
1001
1002    private Element handleAccessRule( String ruleLine ) {
1003        if( m_wysiwygEditorMode ) {
1004            m_currentElement.addContent( "[" + ruleLine + "]" );
1005        }
1006
1007        if( !m_parseAccessRules ) {
1008            return m_currentElement;
1009        }
1010        final Page page = m_context.getRealPage();
1011        // UserDatabase db = m_context.getEngine().getUserDatabase();
1012
1013        if( ruleLine.startsWith( "{" ) ) {
1014            ruleLine = ruleLine.substring( 1 );
1015        }
1016
1017        if( ruleLine.endsWith( "}" ) ) {
1018            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1019        }
1020
1021        if( log.isDebugEnabled() ) {
1022            log.debug("page="+page.getName()+", ACL = "+ruleLine);
1023        }
1024
1025        try {
1026            final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine );
1027            page.setAcl( acl );
1028
1029            if( log.isDebugEnabled() ) {
1030                log.debug( acl.toString() );
1031            }
1032        } catch( final WikiSecurityException wse ) {
1033            return makeError( wse.getMessage() );
1034        }
1035
1036        return m_currentElement;
1037    }
1038
1039    /**
1040     *  Handles metadata setting [{SET foo=bar}]
1041     */
1042    private Element handleMetadata( final String link ) {
1043        if( m_wysiwygEditorMode ) {
1044            m_currentElement.addContent( "[" + link + "]" );
1045        }
1046
1047        try {
1048            final String args = link.substring( link.indexOf(' '), link.length()-1 );
1049            final String name = args.substring( 0, args.indexOf('=') ).trim();
1050            String val  = args.substring( args.indexOf('=')+1 ).trim();
1051
1052            if( val.startsWith("'") ) {
1053                val = val.substring( 1 );
1054            }
1055            if( val.endsWith("'") ) {
1056                val = val.substring( 0, val.length()-1 );
1057            }
1058
1059            // log.debug("SET name='"+name+"', value='"+val+"'.");
1060
1061            if( name.length() > 0 && val.length() > 0 ) {
1062                val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val );
1063                m_context.getPage().setAttribute( name, val );
1064            }
1065        } catch( final Exception e ) {
1066            final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1067            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
1068        }
1069
1070        return m_currentElement;
1071    }
1072
1073    /**
1074     *  Emits a processing instruction that will disable markup escaping. This is
1075     *  very useful if you want to emit HTML directly into the stream.
1076     *
1077     */
1078    private void disableOutputEscaping() {
1079        addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) );
1080    }
1081
1082    /**
1083     *  Gobbles up all hyperlinks that are encased in square brackets.
1084     */
1085    private Element handleHyperlinks( String linktext, final int pos ) {
1086        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1087        final StringBuilder sb = new StringBuilder( linktext.length() + 80 );
1088
1089        if( m_linkParsingOperations.isAccessRule( linktext ) ) {
1090            return handleAccessRule( linktext );
1091        }
1092
1093        if( m_linkParsingOperations.isMetadata( linktext ) ) {
1094            return handleMetadata( linktext );
1095        }
1096
1097        if( m_linkParsingOperations.isPluginLink( linktext ) ) {
1098            try {
1099                final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
1100
1101                // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not.
1102                if( pluginContent != null ) {
1103                    addElement( pluginContent );
1104                    pluginContent.executeParse( m_context );
1105                }
1106            } catch( final PluginException e ) {
1107                log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
1108                //log.info( "Root cause:",e.getRootThrowable() );
1109                if( !m_wysiwygEditorMode ) {
1110                    final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE );
1111                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ),
1112                                                                        m_context.getRealPage().getWiki(),
1113                                                                        m_context.getRealPage().getName(),
1114                                                                        e.getMessage() ) ) );
1115                }
1116            }
1117
1118            return m_currentElement;
1119        }
1120
1121        try {
1122            final LinkParser.Link link = m_linkParser.parse( linktext );
1123            linktext = link.getText();
1124            String linkref = link.getReference();
1125
1126            //
1127            //  Yes, we now have the components separated.
1128            //  linktext = the text the link should have
1129            //  linkref  = the url or page name.
1130            //
1131            //  In many cases these are the same.  [linktext|linkref].
1132            //
1133            if( m_linkParsingOperations.isVariableLink( linktext ) ) {
1134                final Content el = new VariableContent( linktext );
1135
1136                addElement( el );
1137            } else if( m_linkParsingOperations.isExternalLink( linkref ) ) {
1138                // It's an external link, out of this Wiki
1139
1140                callMutatorChain( m_externalLinkMutatorChain, linkref );
1141
1142                if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
1143                    handleImageLink( linkref, linktext, link.hasReference() );
1144                } else {
1145                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
1146                    addElement( outlinkImage() );
1147                }
1148            } else if( link.isInterwikiLink() ) {
1149                // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved.
1150
1151                // FIXME: There is an interesting issue here:  We probably should
1152                //        URLEncode the wikiPage, but we can't since some of the
1153                //        Wikis use slashes (/), which won't survive URLEncoding.
1154                //        Besides, we don't know which character set the other Wiki
1155                //        is using, so you'll have to write the entire name as it appears
1156                //        in the URL.  Bugger.
1157
1158                final String extWiki = link.getExternalWiki();
1159                final String wikiPage = link.getExternalWikiPage();
1160
1161                if( m_wysiwygEditorMode ) {
1162                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
1163                } else {
1164                    String urlReference = m_engine.getInterWikiURL( extWiki );
1165
1166                    if( urlReference != null ) {
1167                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1168                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
1169
1170                        if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) {
1171                            handleImageLink( urlReference, linktext, link.hasReference() );
1172                        } else {
1173                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
1174                        }
1175
1176                        if( m_linkParsingOperations.isExternalLink( urlReference ) ) {
1177                            addElement( outlinkImage() );
1178                        }
1179                    } else {
1180                        final Object[] args = { escapeHTMLEntities( extWiki ) };
1181
1182                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
1183                    }
1184                }
1185            } else if( linkref.startsWith( "#" ) ) {
1186                // It defines a local footnote
1187                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1188            } else if( TextUtil.isNumber( linkref ) ) {
1189                // It defines a reference to a local footnote
1190                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1191            } else {
1192                final int hashMark;
1193
1194                // Internal wiki link, but is it an attachment link?
1195                String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref );
1196                if( attachment != null ) {
1197                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1198
1199                    if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
1200                        attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment );
1201                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1202                    } else {
1203                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1204                    }
1205                } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) {
1206                    // It's an internal Wiki link, but to a named section
1207
1208                    final String namedSection = linkref.substring( hashMark + 1 );
1209                    linkref = linkref.substring( 0, hashMark );
1210
1211                    linkref = MarkupParser.cleanLink( linkref );
1212
1213                    callMutatorChain( m_localLinkMutatorChain, linkref );
1214
1215                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1216                    if( matchedLink != null ) {
1217                        String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) );
1218                        sectref = sectref.replace( '%', '_' );
1219                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1220                    } else {
1221                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1222                    }
1223                } else {
1224                    // It's an internal Wiki link
1225                    linkref = MarkupParser.cleanLink( linkref );
1226
1227                    callMutatorChain( m_localLinkMutatorChain, linkref );
1228
1229                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1230                    if( matchedLink != null ) {
1231                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1232                    } else {
1233                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1234                    }
1235                }
1236            }
1237
1238    } catch( final ParseException e ) {
1239        log.info( "Parser failure: ", e );
1240        final Object[] args = { e.getMessage() };
1241        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1242    }
1243        return m_currentElement;
1244    }
1245
1246    /**
1247     *  Pushes back any string that has been read.  It will obviously
1248     *  be pushed back in a reverse order.
1249     *
1250     *  @since 2.1.77
1251     */
1252    private void pushBack( final String s )
1253        throws IOException
1254    {
1255        for( int i = s.length()-1; i >= 0; i-- )
1256        {
1257            pushBack( s.charAt(i) );
1258        }
1259    }
1260
1261    private Element handleBackslash()
1262        throws IOException
1263    {
1264        final int ch = nextToken();
1265
1266        if( ch == '\\' )
1267        {
1268            final int ch2 = nextToken();
1269
1270            if( ch2 == '\\' )
1271            {
1272                pushElement( new Element("br").setAttribute("clear","all"));
1273                return popElement("br");
1274            }
1275
1276            pushBack( ch2 );
1277
1278            pushElement( new Element("br") );
1279            return popElement("br");
1280        }
1281
1282        pushBack( ch );
1283
1284        return null;
1285    }
1286
1287    private Element handleUnderscore()
1288        throws IOException
1289    {
1290        final int ch = nextToken();
1291        Element el = null;
1292
1293        if( ch == '_' )
1294        {
1295            if( m_isbold )
1296            {
1297                el = popElement("b");
1298            }
1299            else
1300            {
1301                el = pushElement( new Element("b") );
1302            }
1303            m_isbold = !m_isbold;
1304        }
1305        else
1306        {
1307            pushBack( ch );
1308        }
1309
1310        return el;
1311    }
1312
1313
1314    /**
1315     *  For example: italics.
1316     */
1317    private Element handleApostrophe()
1318        throws IOException
1319    {
1320        final int ch = nextToken();
1321        Element el = null;
1322
1323        if( ch == '\'' )
1324        {
1325            if( m_isitalic )
1326            {
1327                el = popElement("i");
1328            }
1329            else
1330            {
1331                el = pushElement( new Element("i") );
1332            }
1333            m_isitalic = !m_isitalic;
1334        }
1335        else
1336        {
1337            pushBack( ch );
1338        }
1339
1340        return el;
1341    }
1342
1343    private Element handleOpenbrace( final boolean isBlock )
1344        throws IOException
1345    {
1346        final int ch = nextToken();
1347
1348        if( ch == '{' )
1349        {
1350            final int ch2 = nextToken();
1351
1352            if( ch2 == '{' )
1353            {
1354                m_isPre = true;
1355                m_isEscaping = true;
1356                m_isPreBlock = isBlock;
1357
1358                if( isBlock )
1359                {
1360                    startBlockLevel();
1361                    return pushElement( new Element("pre") );
1362                }
1363
1364                return pushElement( new Element("span").setAttribute("class","inline-code") );
1365            }
1366
1367            pushBack( ch2 );
1368
1369            return pushElement( new Element("tt") );
1370        }
1371
1372        pushBack( ch );
1373
1374        return null;
1375    }
1376
1377    /**
1378     *  Handles both }} and }}}
1379     */
1380    private Element handleClosebrace()
1381        throws IOException
1382    {
1383        final int ch2 = nextToken();
1384
1385        if( ch2 == '}' )
1386        {
1387            final int ch3 = nextToken();
1388
1389            if( ch3 == '}' )
1390            {
1391                if( m_isPre )
1392                {
1393                    if( m_isPreBlock )
1394                    {
1395                        popElement( "pre" );
1396                    }
1397                    else
1398                    {
1399                        popElement( "span" );
1400                    }
1401
1402                    m_isPre = false;
1403                    m_isEscaping = false;
1404                    return m_currentElement;
1405                }
1406
1407                m_plainTextBuf.append("}}}");
1408                return m_currentElement;
1409            }
1410
1411            pushBack( ch3 );
1412
1413            if( !m_isEscaping )
1414            {
1415                return popElement("tt");
1416            }
1417        }
1418
1419        pushBack( ch2 );
1420
1421        return null;
1422    }
1423
1424    private Element handleDash()
1425        throws IOException
1426    {
1427        int ch = nextToken();
1428
1429        if( ch == '-' )
1430        {
1431            final int ch2 = nextToken();
1432
1433            if( ch2 == '-' )
1434            {
1435                final int ch3 = nextToken();
1436
1437                if( ch3 == '-' )
1438                {
1439                    // Empty away all the rest of the dashes.
1440                    // Do not forget to return the first non-match back.
1441                    do
1442                    {
1443                        ch = nextToken();
1444                    }
1445                    while ( ch == '-' );
1446
1447                    pushBack(ch);
1448                    startBlockLevel();
1449                    pushElement( new Element("hr") );
1450                    return popElement( "hr" );
1451                }
1452
1453                pushBack( ch3 );
1454            }
1455            pushBack( ch2 );
1456        }
1457
1458        pushBack( ch );
1459
1460        return null;
1461    }
1462
1463    private Element handleHeading()
1464        throws IOException
1465    {
1466        Element el = null;
1467
1468        final int ch  = nextToken();
1469
1470        final Heading hd = new Heading();
1471
1472        if( ch == '!' )
1473        {
1474            final int ch2 = nextToken();
1475
1476            if( ch2 == '!' )
1477            {
1478                final String title = peekAheadLine();
1479
1480                el = makeHeading( Heading.HEADING_LARGE, title, hd);
1481            }
1482            else
1483            {
1484                pushBack( ch2 );
1485                final String title = peekAheadLine();
1486                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1487            }
1488        }
1489        else
1490        {
1491            pushBack( ch );
1492            final String title = peekAheadLine();
1493            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1494        }
1495
1496        callHeadingListenerChain( hd );
1497
1498        m_lastHeading = hd;
1499
1500        if( el != null ) pushElement(el);
1501
1502        return el;
1503    }
1504
1505    /**
1506     *  Reads the stream until the next EOL or EOF.  Note that it will also read the
1507     *  EOL from the stream.
1508     */
1509    private StringBuilder readUntilEOL()
1510        throws IOException
1511    {
1512        int ch;
1513        final StringBuilder buf = new StringBuilder( 256 );
1514
1515        while( true )
1516        {
1517            ch = nextToken();
1518
1519            if( ch == -1 )
1520                break;
1521
1522            buf.append( (char) ch );
1523
1524            if( ch == '\n' )
1525                break;
1526        }
1527        return buf;
1528    }
1529
1530    /** Controls whether italic is restarted after a paragraph shift */
1531
1532    private boolean m_restartitalic = false;
1533    private boolean m_restartbold   = false;
1534
1535    private boolean m_newLine;
1536
1537    /**
1538     *  Starts a block level element, therefore closing
1539     *  a potential open paragraph tag.
1540     */
1541    private void startBlockLevel()
1542    {
1543        // These may not continue over block level limits in XHTML
1544
1545        popElement("i");
1546        popElement("b");
1547        popElement("tt");
1548
1549        if( m_isOpenParagraph )
1550        {
1551            m_isOpenParagraph = false;
1552            popElement("p");
1553            m_plainTextBuf.append("\n"); // Just small beautification
1554        }
1555
1556        m_restartitalic = m_isitalic;
1557        m_restartbold   = m_isbold;
1558
1559        m_isitalic = false;
1560        m_isbold   = false;
1561    }
1562
1563    private static String getListType( final char c )
1564    {
1565        if( c == '*' )
1566        {
1567            return "ul";
1568        }
1569        else if( c == '#' )
1570        {
1571            return "ol";
1572        }
1573        throw new InternalWikiException("Parser got faulty list type: "+c);
1574    }
1575    /**
1576     *  Like original handleOrderedList() and handleUnorderedList()
1577     *  however handles both ordered ('#') and unordered ('*') mixed together.
1578     */
1579
1580    // FIXME: Refactor this; it's a bit messy.
1581
1582    private Element handleGeneralList()
1583        throws IOException
1584    {
1585         startBlockLevel();
1586
1587         String strBullets = readWhile( "*#" );
1588         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
1589         final int numBullets = strBullets.length();
1590
1591         // override the beginning portion of bullet pattern to be like the previous
1592         // to simulate PHPWiki style lists
1593
1594         if(m_allowPHPWikiStyleLists)
1595         {
1596             // only substitute if different
1597             if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1598                   (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) )
1599             {
1600                 if(numBullets <= m_genlistlevel)
1601                 {
1602                     // Substitute all but the last character (keep the expressed bullet preference)
1603                     strBullets  = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "")
1604                                   + strBullets.substring(numBullets-1, numBullets);
1605                 }
1606                 else
1607                 {
1608                     strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets);
1609                 }
1610             }
1611         }
1612
1613         //
1614         //  Check if this is still of the same type
1615         //
1616         if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1617            (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) )
1618         {
1619             if( numBullets > m_genlistlevel )
1620             {
1621                 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) );
1622
1623                 for( ; m_genlistlevel < numBullets; m_genlistlevel++ )
1624                 {
1625                     // bullets are growing, get from new bullet list
1626                     pushElement( new Element("li") );
1627                     pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) ));
1628                 }
1629             }
1630             else if( numBullets < m_genlistlevel )
1631             {
1632                 //  Close the previous list item.
1633                 // buf.append( m_renderer.closeListItem() );
1634                 popElement( "li" );
1635
1636                 for( ; m_genlistlevel > numBullets; m_genlistlevel-- )
1637                 {
1638                     // bullets are shrinking, get from old bullet list
1639
1640                     popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1641                     if( m_genlistlevel > 0 )
1642                     {
1643                         popElement( "li" );
1644                     }
1645
1646                 }
1647             }
1648             else
1649             {
1650                 if( m_genlistlevel > 0 )
1651                 {
1652                     popElement( "li" );
1653                 }
1654             }
1655         }
1656         else
1657         {
1658             //
1659             //  The pattern has changed, unwind and restart
1660             //
1661             int  numEqualBullets;
1662             final int  numCheckBullets;
1663
1664             // find out how much is the same
1665             numEqualBullets = 0;
1666             numCheckBullets = Math.min(numBullets,m_genlistlevel);
1667
1668             while( numEqualBullets < numCheckBullets )
1669             {
1670                 // if the bullets are equal so far, keep going
1671                 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets))
1672                     numEqualBullets++;
1673                 // otherwise giveup, we have found how many are equal
1674                 else
1675                     break;
1676             }
1677
1678             //unwind
1679             for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- )
1680             {
1681                 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) );
1682                 if( m_genlistlevel > numBullets )
1683                 {
1684                     popElement("li");
1685                 }
1686             }
1687
1688             //rewind
1689
1690             pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) );
1691             for(int i = numEqualBullets; i < numBullets; i++)
1692             {
1693                 pushElement( new Element("li") );
1694                 pushElement( new Element( getListType( strBullets.charAt(i) ) ) );
1695             }
1696             m_genlistlevel = numBullets;
1697         }
1698
1699         //
1700         //  Push a new list item, and eat away any extra whitespace
1701         //
1702         pushElement( new Element("li") );
1703         readWhile(" ");
1704
1705         // work done, remember the new bullet list (in place of old one)
1706         m_genlistBulletBuffer.setLength(0);
1707         m_genlistBulletBuffer.append(strBullets);
1708
1709         return m_currentElement;
1710    }
1711
1712    private Element unwindGeneralList()
1713    {
1714        //unwind
1715        for( ; m_genlistlevel > 0; m_genlistlevel-- )
1716        {
1717            popElement( "li" );
1718            popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1719        }
1720
1721        m_genlistBulletBuffer.setLength(0);
1722
1723        return null;
1724    }
1725
1726
1727    private Element handleDefinitionList()
1728        throws IOException
1729    {
1730        if( !m_isdefinition )
1731        {
1732            m_isdefinition = true;
1733
1734            startBlockLevel();
1735
1736            pushElement( new Element("dl") );
1737            return pushElement( new Element("dt") );
1738        }
1739
1740        return null;
1741    }
1742
1743    private Element handleOpenbracket()
1744        throws IOException
1745    {
1746        final StringBuilder sb = new StringBuilder(40);
1747        final int pos = getPosition();
1748        int ch = nextToken();
1749        boolean isPlugin = false;
1750
1751        if( ch == '[' )
1752        {
1753            if( m_wysiwygEditorMode )
1754            {
1755                sb.append( '[' );
1756            }
1757
1758            sb.append( (char)ch );
1759
1760            while( (ch = nextToken()) == '[' )
1761            {
1762                sb.append( (char)ch );
1763            }
1764        }
1765
1766
1767        if( ch == '{' )
1768        {
1769            isPlugin = true;
1770        }
1771
1772        pushBack( ch );
1773
1774        if( sb.length() > 0 )
1775        {
1776            m_plainTextBuf.append( sb );
1777            return m_currentElement;
1778        }
1779
1780        //
1781        //  Find end of hyperlink
1782        //
1783
1784        ch = nextToken();
1785        int nesting = 1;    // Check for nested plugins
1786
1787        while( ch != -1 )
1788        {
1789            final int ch2 = nextToken(); pushBack(ch2);
1790
1791            if( isPlugin )
1792            {
1793                if( ch == '[' && ch2 == '{' )
1794                {
1795                    nesting++;
1796                }
1797                else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' )
1798                {
1799                    break;
1800                }
1801                else if( ch == '}' && ch2 == ']' )
1802                {
1803                    // NB: This will be decremented once at the end
1804                    nesting--;
1805                }
1806            }
1807            else
1808            {
1809                if( ch == ']' )
1810                {
1811                    break;
1812                }
1813            }
1814
1815            sb.append( (char) ch );
1816
1817            ch = nextToken();
1818        }
1819
1820        //
1821        //  If the link is never finished, do some tricks to display the rest of the line
1822        //  unchanged.
1823        //
1824        if( ch == -1 )
1825        {
1826            log.debug("Warning: unterminated link detected!");
1827            m_isEscaping = true;
1828            m_plainTextBuf.append( sb );
1829            flushPlainText();
1830            m_isEscaping = false;
1831            return m_currentElement;
1832        }
1833
1834        return handleHyperlinks( sb.toString(), pos );
1835    }
1836
1837    /**
1838     *  Reads the stream until the current brace is closed or stream end.
1839     */
1840    private String readBraceContent( final char opening, final char closing )
1841        throws IOException
1842    {
1843        final StringBuilder sb = new StringBuilder(40);
1844        int braceLevel = 1;
1845        int ch;
1846        while(( ch = nextToken() ) != -1 )
1847        {
1848            if( ch == '\\' )
1849            {
1850                continue;
1851            }
1852            else if ( ch == opening )
1853            {
1854                braceLevel++;
1855            }
1856            else if ( ch == closing )
1857            {
1858                braceLevel--;
1859                if (braceLevel==0)
1860                {
1861                  break;
1862                }
1863            }
1864            sb.append( (char)ch );
1865        }
1866        return sb.toString();
1867    }
1868
1869
1870    /**
1871     *  Handles constructs of type %%(style) and %%class
1872     * @param newLine
1873     * @return An Element containing the div or span, depending on the situation.
1874     * @throws IOException
1875     */
1876    private Element handleDiv( final boolean newLine )
1877        throws IOException
1878    {
1879        int ch = nextToken();
1880        Element el = null;
1881
1882        if( ch == '%' )
1883        {
1884            String style = null;
1885            String clazz = null;
1886
1887            ch = nextToken();
1888
1889            //
1890            //  Style or class?
1891            //
1892            if( ch == '(' )
1893            {
1894                style = readBraceContent('(',')');
1895            }
1896            else if( Character.isLetter( (char) ch ) )
1897            {
1898                pushBack( ch );
1899                clazz = readUntil( "( \t\n\r" );
1900                //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters
1901                //CSS Classnames can contain only the characters [a-zA-Z0-9] and
1902                //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_".
1903                //They cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1904
1905                //(1) replace '.' by spaces, allowing multiple classnames on a div or span
1906                //(2) remove any invalid character
1907                if( clazz != null){
1908
1909                    clazz = clazz.replace('.', ' ')
1910                                 .replaceAll("[^\\s-_\\w\\x200-\\x377]+","");
1911
1912                }
1913                ch = nextToken();
1914
1915                //check for %%class1.class2( style information )
1916                if( ch == '(' )
1917                {
1918                    style = readBraceContent('(',')');
1919                }
1920                //
1921                //  Pop out only spaces, so that the upcoming EOL check does not check the
1922                //  next line.
1923                //
1924                else if( ch == '\n' || ch == '\r' )
1925                {
1926                    pushBack(ch);
1927                }
1928            }
1929            else
1930            {
1931                //
1932                // Anything else stops.
1933                //
1934
1935                pushBack(ch);
1936
1937                try
1938                {
1939                    final Boolean isSpan = m_styleStack.pop();
1940
1941                    if( isSpan == null )
1942                    {
1943                        // Fail quietly
1944                    }
1945                    else if( isSpan.booleanValue() )
1946                    {
1947                        el = popElement( "span" );
1948                    }
1949                    else
1950                    {
1951                        el = popElement( "div" );
1952                    }
1953                }
1954                catch( final EmptyStackException e )
1955                {
1956                    log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened.");
1957                    return m_currentElement;
1958                }
1959
1960                return el;
1961            }
1962
1963            //
1964            //  Check if there is an attempt to do something nasty
1965            //
1966
1967            try
1968            {
1969                style = StringEscapeUtils.unescapeHtml4(style);
1970                if( style != null && style.indexOf("javascript:") != -1 )
1971                {
1972                    log.debug("Attempt to output javascript within CSS:"+style);
1973                    final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1974                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
1975                }
1976            }
1977            catch( final NumberFormatException e )
1978            {
1979                //
1980                //  If there are unknown entities, we don't want the parser to stop.
1981                //
1982                final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1983                final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
1984                return addElement( makeError( msg ) );
1985            }
1986
1987            //
1988            //  Decide if we should open a div or a span?
1989            //
1990            final String eol = peekAheadLine();
1991
1992            if( eol.trim().length() > 0 )
1993            {
1994                // There is stuff after the class
1995
1996                el = new Element("span");
1997
1998                m_styleStack.push( Boolean.TRUE );
1999            }
2000            else
2001            {
2002                startBlockLevel();
2003                el = new Element("div");
2004                m_styleStack.push( Boolean.FALSE );
2005            }
2006
2007            if( style != null ) el.setAttribute("style", style);
2008            if( clazz != null ) el.setAttribute("class", clazz);
2009            el = pushElement( el );
2010
2011            return el;
2012        }
2013
2014        pushBack(ch);
2015
2016        return el;
2017    }
2018
2019    private Element handleSlash( final boolean newLine )
2020        throws IOException
2021    {
2022        final int ch = nextToken();
2023
2024        pushBack(ch);
2025        if( ch == '%' && !m_styleStack.isEmpty() )
2026        {
2027            return handleDiv( newLine );
2028        }
2029
2030        return null;
2031    }
2032
2033    private Element handleBar( final boolean newLine )
2034        throws IOException
2035    {
2036        Element el = null;
2037
2038        if( !m_istable && !newLine )
2039        {
2040            return null;
2041        }
2042
2043        //
2044        //  If the bar is in the first column, we will either start
2045        //  a new table or continue the old one.
2046        //
2047
2048        if( newLine )
2049        {
2050            if( !m_istable )
2051            {
2052                startBlockLevel();
2053                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
2054                m_istable = true;
2055                m_rowNum = 0;
2056            }
2057
2058            m_rowNum++;
2059            final Element tr = ( m_rowNum % 2 != 0 )
2060                       ? new Element("tr").setAttribute("class", "odd")
2061                       : new Element("tr");
2062            el = pushElement( tr );
2063        }
2064
2065        //
2066        //  Check out which table cell element to start;
2067        //  a header element (th) or a regular element (td).
2068        //
2069        final int ch = nextToken();
2070
2071        if( ch == '|' )
2072        {
2073            if( !newLine )
2074            {
2075                el = popElement("th");
2076                if( el == null ) popElement("td");
2077            }
2078            el = pushElement( new Element("th") );
2079        }
2080        else
2081        {
2082            if( !newLine )
2083            {
2084                el = popElement("td");
2085                if( el == null ) popElement("th");
2086            }
2087
2088            el = pushElement( new Element("td") );
2089
2090            pushBack( ch );
2091        }
2092
2093        return el;
2094    }
2095
2096    /**
2097     *  Generic escape of next character or entity.
2098     */
2099    private Element handleTilde()
2100        throws IOException
2101    {
2102        final int ch = nextToken();
2103
2104        if( ch == ' ' )
2105        {
2106            if( m_wysiwygEditorMode )
2107            {
2108                m_plainTextBuf.append( "~ " );
2109            }
2110            return m_currentElement;
2111        }
2112
2113        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
2114            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
2115            ch == '{' || ch == ']' || ch == '}' || ch == '%' )
2116        {
2117            if( m_wysiwygEditorMode )
2118            {
2119                m_plainTextBuf.append( '~' );
2120            }
2121
2122            m_plainTextBuf.append( (char)ch );
2123            m_plainTextBuf.append(readWhile( ""+(char)ch ));
2124            return m_currentElement;
2125        }
2126
2127        // No escape.
2128        pushBack( ch );
2129
2130        return null;
2131    }
2132
2133    private void fillBuffer( final Element startElement )
2134        throws IOException
2135    {
2136        m_currentElement = startElement;
2137
2138        boolean quitReading = false;
2139        m_newLine = true;
2140        disableOutputEscaping();
2141
2142        while(!quitReading)
2143        {
2144            final int ch = nextToken();
2145
2146            if( ch == -1 ) break;
2147
2148            //
2149            //  Check if we're actually ending the preformatted mode.
2150            //  We still must do an entity transformation here.
2151            //
2152            if( m_isEscaping )
2153            {
2154                if( ch == '}' )
2155                {
2156                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
2157                }
2158                else if( ch == -1 )
2159                {
2160                    quitReading = true;
2161                }
2162                else if( ch == '\r' )
2163                {
2164                    // DOS line feeds we ignore.
2165                }
2166                else if( ch == '<' )
2167                {
2168                    m_plainTextBuf.append( "&lt;" );
2169                }
2170                else if( ch == '>' )
2171                {
2172                    m_plainTextBuf.append( "&gt;" );
2173                }
2174                else if( ch == '&' )
2175                {
2176                    m_plainTextBuf.append( "&amp;" );
2177                }
2178                else if( ch == '~' )
2179                {
2180                    String braces = readWhile("}");
2181                    if( braces.length() >= 3 )
2182                    {
2183                        m_plainTextBuf.append("}}}");
2184
2185                        braces = braces.substring(3);
2186                    }
2187                    else
2188                    {
2189                        m_plainTextBuf.append( (char) ch );
2190                    }
2191
2192                    for( int i = braces.length()-1; i >= 0; i-- )
2193                    {
2194                        pushBack(braces.charAt(i));
2195                    }
2196                }
2197                else
2198                {
2199                    m_plainTextBuf.append( (char) ch );
2200                }
2201
2202                continue;
2203            }
2204
2205            //
2206            //  An empty line stops a list
2207            //
2208            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 )
2209            {
2210                m_plainTextBuf.append(unwindGeneralList());
2211            }
2212
2213            if( m_newLine && ch != '|' && m_istable )
2214            {
2215                popElement("table");
2216                m_istable = false;
2217            }
2218
2219            int skip = IGNORE;
2220
2221            //
2222            //  Do the actual parsing and catch any errors.
2223            //
2224            try
2225            {
2226                skip = parseToken( ch );
2227            }
2228            catch( final IllegalDataException e )
2229            {
2230                log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage());
2231
2232                makeError("Error: "+cleanupSuspectData(e.getMessage()) );
2233            }
2234
2235            //
2236            //   The idea is as follows:  If the handler method returns
2237            //   an element (el != null), it is assumed that it has been
2238            //   added in the stack.  Otherwise the character is added
2239            //   as is to the plaintext buffer.
2240            //
2241            //   For the transition phase, if s != null, it also gets
2242            //   added in the plaintext buffer.
2243            //
2244
2245            switch( skip )
2246            {
2247                case ELEMENT:
2248                    m_newLine = false;
2249                    break;
2250
2251                case CHARACTER:
2252                    m_plainTextBuf.append( (char) ch );
2253                    m_newLine = false;
2254                    break;
2255
2256                case IGNORE:
2257                default:
2258                    break;
2259            }
2260        }
2261
2262        closeHeadings();
2263        popElement("domroot");
2264    }
2265
2266    private String cleanupSuspectData( final String s )
2267    {
2268        final StringBuilder sb = new StringBuilder( s.length() );
2269
2270        for( int i = 0; i < s.length(); i++ )
2271        {
2272            final char c = s.charAt(i);
2273
2274            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
2275            else sb.append( "0x"+Integer.toString(c,16).toUpperCase() );
2276        }
2277
2278        return sb.toString();
2279    }
2280
2281    /** The token is a plain character. */
2282    protected static final int CHARACTER = 0;
2283
2284    /** The token is a wikimarkup element. */
2285    protected static final int ELEMENT   = 1;
2286
2287    /** The token is to be ignored. */
2288    protected static final int IGNORE    = 2;
2289
2290    /**
2291     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
2292     *  you think this was a wiki markup element, and IGNORE, if you think
2293     *  we should ignore this altogether.
2294     *  <p>
2295     *  To add your own MarkupParser, you can override this method, but it
2296     *  is recommended that you call super.parseToken() as well to gain advantage
2297     *  of JSPWiki's own markup.  You can call it at the start of your own
2298     *  parseToken() or end - it does not matter.
2299     *
2300     * @param ch The character under investigation
2301     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2302     * @throws IOException If parsing fails.
2303     */
2304    protected int parseToken( final int ch )
2305        throws IOException
2306    {
2307        Element el = null;
2308
2309        //
2310        //  Now, check the incoming token.
2311        //
2312        switch( ch )
2313        {
2314          case '\r':
2315            // DOS linefeeds we forget
2316            return IGNORE;
2317
2318          case '\n':
2319            //
2320            //  Close things like headings, etc.
2321            //
2322
2323            // FIXME: This is not really very fast
2324
2325            closeHeadings();
2326
2327            popElement("dl"); // Close definition lists.
2328            if( m_istable )
2329            {
2330                popElement("tr");
2331            }
2332
2333            m_isdefinition = false;
2334
2335            if( m_newLine )
2336            {
2337                // Paragraph change.
2338                startBlockLevel();
2339
2340                //
2341                //  Figure out which elements cannot be enclosed inside
2342                //  a <p></p> pair according to XHTML rules.
2343                //
2344                final String nextLine = peekAheadLine();
2345                if( nextLine.length() == 0 ||
2346                    (nextLine.length() > 0 &&
2347                     !nextLine.startsWith("{{{") &&
2348                     !nextLine.startsWith("----") &&
2349                     !nextLine.startsWith("%%") &&
2350                     "*#!;".indexOf( nextLine.charAt(0) ) == -1) )
2351                {
2352                    pushElement( new Element("p") );
2353                    m_isOpenParagraph = true;
2354
2355                    if( m_restartitalic )
2356                    {
2357                        pushElement( new Element("i") );
2358                        m_isitalic = true;
2359                        m_restartitalic = false;
2360                    }
2361                    if( m_restartbold )
2362                    {
2363                        pushElement( new Element("b") );
2364                        m_isbold = true;
2365                        m_restartbold = false;
2366                    }
2367                }
2368            }
2369            else
2370            {
2371                m_plainTextBuf.append("\n");
2372                m_newLine = true;
2373            }
2374            return IGNORE;
2375
2376
2377          case '\\':
2378            el = handleBackslash();
2379            break;
2380
2381          case '_':
2382            el = handleUnderscore();
2383            break;
2384
2385          case '\'':
2386            el = handleApostrophe();
2387            break;
2388
2389          case '{':
2390            el = handleOpenbrace( m_newLine );
2391            break;
2392
2393          case '}':
2394            el = handleClosebrace();
2395            break;
2396
2397          case '-':
2398            if( m_newLine )
2399                el = handleDash();
2400
2401            break;
2402
2403          case '!':
2404            if( m_newLine )
2405            {
2406                el = handleHeading();
2407            }
2408            break;
2409
2410          case ';':
2411            if( m_newLine )
2412            {
2413                el = handleDefinitionList();
2414            }
2415            break;
2416
2417          case ':':
2418            if( m_isdefinition )
2419            {
2420                popElement("dt");
2421                el = pushElement( new Element("dd") );
2422                m_isdefinition = false;
2423            }
2424            break;
2425
2426          case '[':
2427            el = handleOpenbracket();
2428            break;
2429
2430          case '*':
2431            if( m_newLine )
2432            {
2433                pushBack('*');
2434                el = handleGeneralList();
2435            }
2436            break;
2437
2438          case '#':
2439            if( m_newLine )
2440            {
2441                pushBack('#');
2442                el = handleGeneralList();
2443            }
2444            break;
2445
2446          case '|':
2447            el = handleBar( m_newLine );
2448            break;
2449
2450          case '~':
2451            el = handleTilde();
2452            break;
2453
2454          case '%':
2455            el = handleDiv( m_newLine );
2456            break;
2457
2458          case '/':
2459            el = handleSlash( m_newLine );
2460            break;
2461
2462          default:
2463            break;
2464        }
2465
2466        return el != null ? ELEMENT : CHARACTER;
2467    }
2468
2469    private void closeHeadings()
2470    {
2471        if( m_lastHeading != null && !m_wysiwygEditorMode )
2472        {
2473            // Add the hash anchor element at the end of the heading
2474            addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) );
2475            m_lastHeading = null;
2476        }
2477        popElement("h2");
2478        popElement("h3");
2479        popElement("h4");
2480    }
2481
2482    /**
2483     *  Parses the entire document from the Reader given in the constructor or
2484     *  set by {@link #setInputReader(Reader)}.
2485     *
2486     *  @return A WikiDocument, ready to be passed to the renderer.
2487     *  @throws IOException If parsing cannot be accomplished.
2488     */
2489    @Override
2490    public WikiDocument parse()
2491        throws IOException
2492    {
2493        final WikiDocument d = new WikiDocument( m_context.getPage() );
2494        d.setContext( m_context );
2495
2496        final Element rootElement = new Element("domroot");
2497
2498        d.setRootElement( rootElement );
2499
2500        fillBuffer( rootElement );
2501
2502        paragraphify(rootElement);
2503
2504        return d;
2505    }
2506
2507    /**
2508     *  Checks out that the first paragraph is correctly installed.
2509     *
2510     *  @param rootElement
2511     */
2512    private void paragraphify( final Element rootElement)
2513    {
2514        //
2515        //  Add the paragraph tag to the first paragraph
2516        //
2517        final List< Content > kids = rootElement.getContent();
2518
2519        if( rootElement.getChild("p") != null )
2520        {
2521            final ArrayList<Content> ls = new ArrayList<>();
2522            int idxOfFirstContent = 0;
2523            int count = 0;
2524
2525            for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ )
2526            {
2527                final Content c = i.next();
2528                if( c instanceof Element )
2529                {
2530                    final String name = ( ( Element )c ).getName();
2531                    if( isBlockLevel( name ) ) break;
2532                }
2533
2534                if( !(c instanceof ProcessingInstruction) )
2535                {
2536                    ls.add( c );
2537                    if( idxOfFirstContent == 0 ) idxOfFirstContent = count;
2538                }
2539            }
2540
2541            //
2542            //  If there were any elements, then add a new <p> (unless it would
2543            //  be an empty one)
2544            //
2545            if( ls.size() > 0 )
2546            {
2547                final Element newel = new Element("p");
2548
2549                for( final Iterator< Content > i = ls.iterator(); i.hasNext(); )
2550                {
2551                    final Content c = i.next();
2552
2553                    c.detach();
2554                    newel.addContent(c);
2555                }
2556
2557                //
2558                // Make sure there are no empty <p/> tags added.
2559                //
2560                if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() )
2561                    rootElement.addContent(idxOfFirstContent, newel);
2562            }
2563        }
2564    }
2565
2566}