001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import java.io.IOException;
022import java.io.Reader;
023import java.io.StringReader;
024import java.text.MessageFormat;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collection;
028import java.util.EmptyStackException;
029import java.util.HashMap;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Map;
033import java.util.Properties;
034import java.util.ResourceBundle;
035import java.util.Stack;
036
037import javax.xml.transform.Result;
038
039import org.apache.commons.lang.StringEscapeUtils;
040import org.apache.commons.lang.StringUtils;
041import org.apache.log4j.Logger;
042import org.apache.oro.text.regex.MalformedPatternException;
043import org.apache.oro.text.regex.MatchResult;
044import org.apache.oro.text.regex.Pattern;
045import org.apache.oro.text.regex.PatternCompiler;
046import org.apache.oro.text.regex.PatternMatcher;
047import org.apache.oro.text.regex.Perl5Compiler;
048import org.apache.oro.text.regex.Perl5Matcher;
049import org.apache.wiki.InternalWikiException;
050import org.apache.wiki.StringTransmutator;
051import org.apache.wiki.WikiContext;
052import org.apache.wiki.WikiPage;
053import org.apache.wiki.api.exceptions.PluginException;
054import org.apache.wiki.api.plugin.WikiPlugin;
055import org.apache.wiki.auth.WikiSecurityException;
056import org.apache.wiki.auth.acl.Acl;
057import org.apache.wiki.i18n.InternationalizationManager;
058import org.apache.wiki.preferences.Preferences;
059import org.apache.wiki.render.CleanTextRenderer;
060import org.apache.wiki.render.RenderingManager;
061import org.apache.wiki.util.TextUtil;
062import org.jdom2.Attribute;
063import org.jdom2.Content;
064import org.jdom2.Element;
065import org.jdom2.IllegalDataException;
066import org.jdom2.ProcessingInstruction;
067import org.jdom2.Verifier;
068
069/**
070 *  Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the
071 *  heart and soul of JSPWiki : make sure you test properly anything that is added,
072 *  or else it breaks down horribly.
073 *
074 *  @since  2.4
075 */
076public class JSPWikiMarkupParser extends MarkupParser {
077
078    protected static final int              READ          = 0;
079    protected static final int              EDIT          = 1;
080    protected static final int              EMPTY         = 2;  // Empty message
081    protected static final int              LOCAL         = 3;
082    protected static final int              LOCALREF      = 4;
083    protected static final int              IMAGE         = 5;
084    protected static final int              EXTERNAL      = 6;
085    protected static final int              INTERWIKI     = 7;
086    protected static final int              IMAGELINK     = 8;
087    protected static final int              IMAGEWIKILINK = 9;
088    protected static final int              ATTACHMENT    = 10;
089
090    private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class );
091
092    private boolean        m_isbold       = false;
093    private boolean        m_isitalic     = false;
094    private boolean        m_istable      = false;
095    private boolean        m_isPre        = false;
096    private boolean        m_isEscaping   = false;
097    private boolean        m_isdefinition = false;
098    private boolean        m_isPreBlock   = false;
099
100    /** Contains style information, in multiple forms. */
101    private Stack<Boolean> m_styleStack   = new Stack<>();
102
103     // general list handling
104    private int            m_genlistlevel = 0;
105    private StringBuilder  m_genlistBulletBuffer = new StringBuilder(10);  // stores the # and * pattern
106    private boolean        m_allowPHPWikiStyleLists = true;
107
108    private boolean        m_isOpenParagraph = false;
109
110    /** Parser for extended link functionality. */
111    private LinkParser     m_linkParser = new LinkParser();
112
113    /** Keeps track of any plain text that gets put in the Text nodes */
114    private StringBuilder  m_plainTextBuf = new StringBuilder(20);
115
116    private Element        m_currentElement;
117
118    /** Keep track of duplicate header names.  */
119    private Map<String, Integer>   m_titleSectionCounter = new HashMap<>();
120
121    /** If true, consider CamelCase hyperlinks as well. */
122    public static final String     PROP_CAMELCASELINKS   = "jspwiki.translatorReader.camelCaseLinks";
123
124    /** If true, all hyperlinks are translated as well, regardless whether they
125        are surrounded by brackets. */
126    public static final String     PROP_PLAINURIS        = "jspwiki.translatorReader.plainUris";
127
128    /** If true, all outward attachment info links have a small link image appended. */
129    public static final String     PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
130
131    /** If true, then considers CamelCase links as well. */
132    private boolean                m_camelCaseLinks      = false;
133
134    /** If true, then generate special output for wysiwyg editing in certain cases */
135    private boolean                m_wysiwygEditorMode     = false;
136
137    /** If true, consider URIs that have no brackets as well. */
138    // FIXME: Currently reserved, but not used.
139    private boolean                m_plainUris           = false;
140
141    /** If true, all outward links use a small link image. */
142    private boolean                m_useOutlinkImage     = true;
143
144    private boolean                m_useAttachmentImage  = true;
145
146    /** If true, allows raw HTML. */
147    private boolean                m_allowHTML           = false;
148
149    private boolean                m_useRelNofollow      = false;
150
151    private PatternCompiler        m_compiler = new Perl5Compiler();
152
153    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
154
155    private PatternMatcher         m_camelCaseMatcher = new Perl5Matcher();
156    private Pattern                m_camelCasePattern;
157
158    private int                    m_rowNum              = 1;
159
160    private Heading                m_lastHeading         = null;
161
162    private static final String CAMELCASE_PATTERN     = "JSPWikiMarkupParser.camelCasePattern";
163
164    /**
165     *  Creates a markup parser.
166     *
167     *  @param context The WikiContext which controls the parsing
168     *  @param in Where the data is read from.
169     */
170    public JSPWikiMarkupParser( WikiContext context, Reader in )
171    {
172        super( context, in );
173        initialize();
174    }
175
176    // FIXME: parsers should be pooled for better performance.
177    private void initialize()
178    {
179        initInlineImagePatterns();
180
181        m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN );
182        if( m_camelCasePattern == null )
183        {
184            try
185            {
186                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,
187                                                         Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
188            }
189            catch( MalformedPatternException e )
190            {
191                log.fatal("Internal error: Someone put in a faulty pattern.",e);
192                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e);
193            }
194            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
195        }
196        //
197        //  Set the properties.
198        //
199        Properties props      = m_engine.getWikiProperties();
200
201        String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
202
203        if( cclinks != null )
204        {
205            m_camelCaseLinks = TextUtil.isPositive( cclinks );
206        }
207        else
208        {
209            m_camelCaseLinks  = TextUtil.getBooleanProperty( props,
210                                                             PROP_CAMELCASELINKS,
211                                                             m_camelCaseLinks );
212        }
213
214        Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE );
215        if( wysiwygVariable != null )
216        {
217            m_wysiwygEditorMode = wysiwygVariable.booleanValue();
218        }
219
220        m_plainUris           = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris );
221        m_useOutlinkImage     = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage );
222        m_useAttachmentImage  = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage );
223        m_allowHTML           = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML );
224        m_useRelNofollow      = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow );
225
226        if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null )
227        {
228            disableAccessRules();
229        }
230
231        m_context.getPage().setHasMetadata();
232    }
233
234    /**
235     *  Calls a transmutator chain.
236     *
237     *  @param list Chain to call
238     *  @param text Text that should be passed to the mutate() method of each of the mutators in the chain.
239     *  @return The result of the mutation.
240     */
241    protected String callMutatorChain( Collection< StringTransmutator > list, String text )
242    {
243        if( list == null || list.size() == 0 )
244        {
245            return text;
246        }
247
248        for( Iterator< StringTransmutator > i = list.iterator(); i.hasNext(); )
249        {
250            StringTransmutator m = i.next();
251
252            text = m.mutate( m_context, text );
253        }
254
255        return text;
256    }
257
258    /**
259     * Calls the heading listeners.
260     *
261     * @param param A Heading object.
262     */
263    protected void callHeadingListenerChain( Heading param )
264    {
265        List< HeadingListener > list = m_headingListenerChain;
266
267        for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); )
268        {
269            HeadingListener h = i.next();
270
271            h.headingAdded( m_context, param );
272        }
273    }
274
275    /**
276     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
277     *  if you really know what you are doing.
278     *
279     *  @param type One of the types above
280     *  @param link URL to which to link to
281     *  @param text Link text
282     *  @param section If a particular section identifier is required.
283     *  @return An A element.
284     *  @since 2.4.78
285     */
286    protected Element createAnchor(int type, String link, String text, String section)
287    {
288        text = escapeHTMLEntities( text );
289        section = escapeHTMLEntities( section );
290        Element el = new Element("a");
291        el.setAttribute("class",CLASS_TYPES[type]);
292        el.setAttribute("href",link+section);
293        el.addContent(text);
294        return el;
295    }
296
297    private Element makeLink( int type, String link, String text, String section, Iterator< Attribute > attributes )
298    {
299        Element el = null;
300
301        if( text == null ) text = link;
302
303        text = callMutatorChain( m_linkMutators, text );
304
305        section = (section != null) ? ("#"+section) : "";
306
307        // Make sure we make a link name that can be accepted
308        // as a valid URL.
309
310        if( link.length() == 0 )
311        {
312            type = EMPTY;
313        }
314        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
315
316        switch(type)
317        {
318            case READ:
319                el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section );
320                break;
321
322            case EDIT:
323                el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" );
324                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
325
326                break;
327
328            case EMPTY:
329                el = new Element("u").addContent(text);
330                break;
331
332                //
333                //  These two are for local references - footnotes and
334                //  references to footnotes.
335                //  We embed the page name (or whatever WikiContext gives us)
336                //  to make sure the links are unique across Wiki.
337                //
338            case LOCALREF:
339                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
340                break;
341
342            case LOCAL:
343                el = new Element("a").setAttribute("class",CLASS_FOOTNOTE);
344                el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1));
345                el.addContent("["+text+"]");
346                break;
347
348                //
349                //  With the image, external and interwiki types we need to
350                //  make sure nobody can put in Javascript or something else
351                //  annoying into the links themselves.  We do this by preventing
352                //  a haxor from stopping the link name short with quotes in
353                //  fillBuffer().
354                //
355            case IMAGE:
356                el = new Element("img").setAttribute("class","inline");
357                el.setAttribute("src",link);
358                el.setAttribute("alt",text);
359                break;
360
361            case IMAGELINK:
362                el = new Element("img").setAttribute("class","inline");
363                el.setAttribute("src",link);
364                el.setAttribute("alt",text);
365                el = createAnchor(IMAGELINK,text,"","").addContent(el);
366                break;
367
368            case IMAGEWIKILINK:
369                String pagelink = m_context.getURL(WikiContext.VIEW,text);
370                el = new Element("img").setAttribute("class","inline");
371                el.setAttribute("src",link);
372                el.setAttribute("alt",text);
373                el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el);
374                break;
375
376            case EXTERNAL:
377                el = createAnchor( EXTERNAL, link, text, section );
378                if( m_useRelNofollow ) el.setAttribute("rel","nofollow");
379                break;
380
381            case INTERWIKI:
382                el = createAnchor( INTERWIKI, link, text, section );
383                break;
384
385            case ATTACHMENT:
386                String attlink = m_context.getURL( WikiContext.ATTACH,
387                                                   link );
388
389                String infolink = m_context.getURL( WikiContext.INFO,
390                                                    link );
391
392                String imglink = m_context.getURL( WikiContext.NONE,
393                                                   "images/attachment_small.png" );
394
395                el = createAnchor( ATTACHMENT, attlink, text, "" );
396
397                if(  m_engine.getAttachmentManager().forceDownload( attlink ) )
398                {
399                    el.setAttribute("download", "");
400                }
401
402                pushElement(el);
403                popElement(el.getName());
404
405                if( m_useAttachmentImage )
406                {
407                    el = new Element("img").setAttribute("src",imglink);
408                    el.setAttribute("border","0");
409                    el.setAttribute("alt","(info)");
410
411                    el = new Element("a").setAttribute("href",infolink).addContent(el);
412                    el.setAttribute("class","infolink");
413                }
414                else
415                {
416                    el = null;
417                }
418                break;
419
420            default:
421                break;
422        }
423
424        if( el != null && attributes != null )
425        {
426            while( attributes.hasNext() )
427            {
428                Attribute attr = attributes.next();
429                if( attr != null )
430                {
431                    el.setAttribute(attr);
432                }
433            }
434        }
435
436        if( el != null )
437        {
438            flushPlainText();
439            m_currentElement.addContent( el );
440        }
441        return el;
442    }
443
444
445    /**
446     *  Figures out if a link is an off-site link.  This recognizes
447     *  the most common protocols by checking how it starts.
448     *
449     *  @param link The link to check.
450     *  @return true, if this is a link outside of this wiki.
451     *  @since 2.4
452     *  @deprecated - use {@link LinkParsingOperations#isExternalLink(String)} instead.
453     */
454    @Deprecated
455    public static boolean isExternalLink( String link )
456    {
457        return new LinkParsingOperations(null).isExternalLink( link );
458    }
459
460    /**
461     *  Returns true if the link is really command to insert
462     *  a plugin.
463     *  <P>
464     *  Currently we just check if the link starts with "{INSERT",
465     *  or just plain "{" but not "{$".
466     *
467     *  @param link Link text, i.e. the contents of text between [].
468     *  @return True, if this link seems to be a command to insert a plugin here.
469     *  @deprecated Use {@link LinkParsingOperations#isPluginLink(String)} instead,
470     */
471    @Deprecated
472    public static boolean isPluginLink( String link )
473    {
474        return new LinkParsingOperations( null ).isPluginLink( link );
475    }
476
477    /**
478     *  These are all of the HTML 4.01 block-level elements.
479     */
480    private static final String[] BLOCK_ELEMENTS = {
481        "address", "blockquote", "div", "dl", "fieldset", "form",
482        "h1", "h2", "h3", "h4", "h5", "h6",
483        "hr", "noscript", "ol", "p", "pre", "table", "ul"
484    };
485
486    private static boolean isBlockLevel( String name )
487    {
488        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
489    }
490
491    /**
492     *  This method peeks ahead in the stream until EOL and returns the result.
493     *  It will keep the buffers untouched.
494     *
495     *  @return The string from the current position to the end of line.
496     */
497
498    // FIXME: Always returns an empty line, even if the stream is full.
499    private String peekAheadLine()
500        throws IOException
501    {
502        String s = readUntilEOL().toString();
503
504        if( s.length() > PUSHBACK_BUFFER_SIZE )
505        {
506            log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters.  Attempting to recover...");
507            pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) );
508        }
509        else
510        {
511            try
512            {
513                pushBack( s );
514            }
515            catch( IOException e )
516            {
517                log.warn("Pushback failed: the line is probably too long.  Attempting to recover.");
518            }
519        }
520        return s;
521    }
522
523    private int flushPlainText()
524    {
525        int numChars = m_plainTextBuf.length();
526
527        if( numChars > 0 )
528        {
529            String buf;
530
531            if( !m_allowHTML )
532            {
533                buf = escapeHTMLEntities(m_plainTextBuf.toString());
534            }
535            else
536            {
537                buf = m_plainTextBuf.toString();
538            }
539            //
540            //  We must first empty the buffer because the side effect of
541            //  calling makeCamelCaseLink() is to call this routine.
542            //
543
544            m_plainTextBuf = new StringBuilder(20);
545
546            try
547            {
548                //
549                //  This is the heaviest part of parsing, and therefore we can
550                //  do some optimization here.
551                //
552                //  1) Only when the length of the buffer is big enough, we try to do the match
553                //
554
555                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 )
556                {
557                    // System.out.println("Buffer="+buf);
558
559                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) )
560                    {
561                        MatchResult result = m_camelCaseMatcher.getMatch();
562
563                        String firstPart = buf.substring(0,result.beginOffset(0));
564                        String prefix = result.group(1);
565
566                        if( prefix == null ) prefix = "";
567
568                        String camelCase = result.group(2);
569                        String protocol  = result.group(3);
570                        String uri       = protocol+result.group(4);
571                        buf              = buf.substring(result.endOffset(0));
572
573                        m_currentElement.addContent( firstPart );
574
575                        //
576                        //  Check if the user does not wish to do URL or WikiWord expansion
577                        //
578                        if( prefix.endsWith("~") || prefix.indexOf('[') != -1 )
579                        {
580                            if( prefix.endsWith("~") )
581                            {
582                                if( m_wysiwygEditorMode )
583                                {
584                                    m_currentElement.addContent( "~" );
585                                }
586                                prefix = prefix.substring(0,prefix.length()-1);
587                            }
588                            if( camelCase != null )
589                            {
590                                m_currentElement.addContent( prefix+camelCase );
591                            }
592                            else if( protocol != null )
593                            {
594                                m_currentElement.addContent( prefix+uri );
595                            }
596                            continue;
597                        }
598
599                        //
600                        //  Fine, then let's check what kind of a link this was
601                        //  and emit the proper elements
602                        //
603                        if( protocol != null )
604                        {
605                            char c = uri.charAt(uri.length()-1);
606                            if( c == '.' || c == ',' )
607                            {
608                                uri = uri.substring(0,uri.length()-1);
609                                buf = c + buf;
610                            }
611                            // System.out.println("URI match "+uri);
612                            m_currentElement.addContent( prefix );
613                            makeDirectURILink( uri );
614                        }
615                        else
616                        {
617                            // System.out.println("Matched: '"+camelCase+"'");
618                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
619                            // System.out.println("prefix="+prefix);
620                            m_currentElement.addContent( prefix );
621
622                            makeCamelCaseLink( camelCase );
623                        }
624                    }
625
626                    m_currentElement.addContent( buf );
627                }
628                else
629                {
630                    //
631                    //  No camelcase asked for, just add the elements
632                    //
633                    m_currentElement.addContent( buf );
634                }
635            }
636            catch( IllegalDataException e )
637            {
638                //
639                // Sometimes it's possible that illegal XML chars is added to the data.
640                // Here we make sure it does not stop parsing.
641                //
642                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
643            }
644        }
645
646        return numChars;
647    }
648
649    /**
650     *  Escapes XML entities in a HTML-compatible way (i.e. does not escape
651     *  entities that are already escaped).
652     *
653     *  @param buf
654     *  @return An escaped string.
655     */
656    private String escapeHTMLEntities(String buf)
657    {
658        StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
659
660        for( int i = 0; i < buf.length(); i++ )
661        {
662            char ch = buf.charAt(i);
663
664            if( ch == '<' )
665            {
666                tmpBuf.append("&lt;");
667            }
668            else if( ch == '>' )
669            {
670                tmpBuf.append("&gt;");
671            }
672            else if( ch == '\"' )
673            {
674                tmpBuf.append("&quot;");
675            }
676            else if( ch == '&' )
677            {
678                //
679                //  If the following is an XML entity reference (&#.*;) we'll
680                //  leave it as it is; otherwise we'll replace it with an &amp;
681                //
682
683                boolean isEntity = false;
684                StringBuilder entityBuf = new StringBuilder();
685
686                if( i < buf.length() -1 )
687                {
688                    for( int j = i; j < buf.length(); j++ )
689                    {
690                        char ch2 = buf.charAt(j);
691
692                        if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' )
693                        {
694                            entityBuf.append(ch2);
695
696                            if( ch2 == ';' )
697                            {
698                                isEntity = true;
699                                break;
700                            }
701                        }
702                        else
703                        {
704                            break;
705                        }
706                    }
707                }
708
709                if( isEntity )
710                {
711                    tmpBuf.append( entityBuf );
712                    i = i + entityBuf.length() - 1;
713                }
714                else
715                {
716                    tmpBuf.append("&amp;");
717                }
718
719            }
720            else
721            {
722                tmpBuf.append( ch );
723            }
724        }
725
726        return tmpBuf.toString();
727    }
728
729    private Element pushElement( Element e )
730    {
731        flushPlainText();
732        m_currentElement.addContent( e );
733        m_currentElement = e;
734
735        return e;
736    }
737
738    private Element addElement( Content e )
739    {
740        if( e != null )
741        {
742            flushPlainText();
743            m_currentElement.addContent( e );
744        }
745        return m_currentElement;
746    }
747
748    /**
749     *  All elements that can be empty by the HTML DTD.
750     */
751    //  Keep sorted.
752    private static final String[] EMPTY_ELEMENTS = {
753        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
754    };
755
756    /**
757     *  Goes through the current element stack and pops all elements until this
758     *  element is found - this essentially "closes" and element.
759     *
760     *  @param s
761     *  @return The new current element, or null, if there was no such element in the entire stack.
762     */
763    private Element popElement( String s )
764    {
765        int flushedBytes = flushPlainText();
766
767        Element currEl = m_currentElement;
768
769        while( currEl.getParentElement() != null )
770        {
771            if( currEl.getName().equals(s) && !currEl.isRootElement() )
772            {
773                m_currentElement = currEl.getParentElement();
774
775                //
776                //  Check if it's okay for this element to be empty.  Then we will
777                //  trick the JDOM generator into not generating an empty element,
778                //  by putting an empty string between the tags.  Yes, it's a kludge
779                //  but what'cha gonna do about it. :-)
780                //
781
782                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 )
783                {
784                    currEl.addContent("");
785                }
786
787                return m_currentElement;
788            }
789
790            currEl = currEl.getParentElement();
791        }
792
793        return null;
794    }
795
796
797    /**
798     *  Reads the stream until it meets one of the specified
799     *  ending characters, or stream end.  The ending character will be left
800     *  in the stream.
801     */
802    private String readUntil( String endChars )
803        throws IOException
804    {
805        StringBuilder sb = new StringBuilder( 80 );
806        int ch = nextToken();
807
808        while( ch != -1 )
809        {
810            if( ch == '\\' )
811            {
812                ch = nextToken();
813                if( ch == -1 )
814                {
815                    break;
816                }
817            }
818            else
819            {
820                if( endChars.indexOf((char)ch) != -1 )
821                {
822                    pushBack( ch );
823                    break;
824                }
825            }
826            sb.append( (char) ch );
827            ch = nextToken();
828        }
829
830        return sb.toString();
831    }
832
833    /**
834     *  Reads the stream while the characters that have been specified are
835     *  in the stream, returning then the result as a String.
836     */
837    private String readWhile( String endChars )
838        throws IOException
839    {
840        StringBuilder sb = new StringBuilder( 80 );
841        int ch = nextToken();
842
843        while( ch != -1 )
844        {
845            if( endChars.indexOf((char)ch) == -1 )
846            {
847                pushBack( ch );
848                break;
849            }
850
851            sb.append( (char) ch );
852            ch = nextToken();
853        }
854
855        return sb.toString();
856    }
857
858    private JSPWikiMarkupParser m_cleanTranslator;
859
860    /**
861     *  Does a lazy init.  Otherwise, we would get into a situation
862     *  where HTMLRenderer would try and boot a TranslatorReader before
863     *  the TranslatorReader it is contained by is up.
864     */
865    private JSPWikiMarkupParser getCleanTranslator()
866    {
867        if( m_cleanTranslator == null )
868        {
869            WikiContext dummyContext = new WikiContext( m_engine,
870                                                        m_context.getHttpRequest(),
871                                                        m_context.getPage() );
872            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
873
874            m_cleanTranslator.m_allowHTML = true;
875        }
876
877        return m_cleanTranslator;
878    }
879    /**
880     *  Modifies the "hd" parameter to contain proper values.  Because
881     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
882     *  % after url encoding with '_'.
883     *  <p>
884     *  Counts also duplicate headings (= headings with similar name), and
885     *  attaches a counter.
886     */
887    private String makeHeadingAnchor( String baseName, String title, Heading hd )
888    {
889        hd.m_titleText = title;
890        title = MarkupParser.wikifyLink( title );
891
892        hd.m_titleSection = m_engine.encodeName(title);
893
894        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) )
895        {
896            Integer count = m_titleSectionCounter.get( hd.m_titleSection );
897            count = count + 1;
898            m_titleSectionCounter.put( hd.m_titleSection, count );
899            hd.m_titleSection += "-" + count;
900        }
901        else
902        {
903            m_titleSectionCounter.put( hd.m_titleSection, 1 );
904        }
905
906        hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+
907                           "-"+hd.m_titleSection;
908        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
909        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
910
911        return hd.m_titleAnchor;
912    }
913
914    private String makeSectionTitle( String title )
915    {
916        title = title.trim();
917        String outTitle;
918
919        try
920        {
921            JSPWikiMarkupParser dtr = getCleanTranslator();
922            dtr.setInputReader( new StringReader(title) );
923
924            CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse());
925
926            outTitle = ctt.getString();
927        }
928        catch( IOException e )
929        {
930            log.fatal("CleanTranslator not working", e);
931            throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() , e);
932        }
933
934        return outTitle;
935    }
936
937    /**
938     *  Returns XHTML for the heading.
939     *
940     *  @param level The level of the heading.  @see Heading
941     *  @param title the title for the heading
942     *  @param hd a List to which heading should be added
943     *  @return An Element containing the heading
944     */
945    public Element makeHeading( int level, String title, Heading hd )
946    {
947        Element el = null;
948
949        String pageName = m_context.getPage().getName();
950
951        String outTitle = makeSectionTitle( title );
952
953        hd.m_level = level;
954
955        switch( level )
956        {
957          case Heading.HEADING_SMALL:
958            el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
959            break;
960
961          case Heading.HEADING_MEDIUM:
962            el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
963            break;
964
965          case Heading.HEADING_LARGE:
966            el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
967            break;
968
969          default:
970            throw new InternalWikiException("Illegal heading type "+level);
971        }
972
973
974        return el;
975    }
976
977    /**
978     *  When given a link to a WikiName, we just return
979     *  a proper HTML link for it.  The local link mutator
980     *  chain is also called.
981     */
982    private Element makeCamelCaseLink( String wikiname )
983    {
984        String matchedLink = m_linkParsingOperations.linkIfExists( wikiname );
985
986        callMutatorChain( m_localLinkMutatorChain, wikiname );
987
988        if( matchedLink != null ) {
989            makeLink( READ, matchedLink, wikiname, null, null );
990        } else {
991            makeLink( EDIT, wikiname, wikiname, null, null );
992        }
993
994        return m_currentElement;
995    }
996
997    /** Holds the image URL for the duration of this parser */
998    private String m_outlinkImageURL = null;
999
1000    /**
1001     *  Returns an element for the external link image (out.png).  However,
1002     *  this method caches the URL for the lifetime of this MarkupParser,
1003     *  because it's commonly used, and we'll end up with possibly hundreds
1004     *  our thousands of references to it...  It's a lot faster, too.
1005     *
1006     *  @return  An element containing the HTML for the outlink image.
1007     */
1008    private Element outlinkImage()
1009    {
1010        Element el = null;
1011
1012        if( m_useOutlinkImage )
1013        {
1014            if( m_outlinkImageURL == null )
1015            {
1016                m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE );
1017            }
1018
1019            el = new Element( "img" ).setAttribute( "class", OUTLINK );
1020            el.setAttribute( "src", m_outlinkImageURL );
1021            el.setAttribute( "alt","" );
1022        }
1023
1024        return el;
1025    }
1026
1027    /**
1028     *  Takes an URL and turns it into a regular wiki link.  Unfortunately,
1029     *  because of the way that flushPlainText() works, it already encodes
1030     *  all of the XML entities.  But so does WikiContext.getURL(), so we
1031     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
1032     *  <p>
1033     *  What a crappy problem.
1034     *
1035     * @param url
1036     * @return An anchor Element containing the link.
1037     */
1038    private Element makeDirectURILink( String url )
1039    {
1040        Element result;
1041        String last = null;
1042
1043        if( url.endsWith(",") || url.endsWith(".") )
1044        {
1045            last = url.substring( url.length()-1 );
1046            url  = url.substring( 0, url.length()-1 );
1047        }
1048
1049        callMutatorChain( m_externalLinkMutatorChain, url );
1050
1051        if( m_linkParsingOperations.isImageLink( url ) )
1052        {
1053            result = handleImageLink( StringUtils.replace(url,"&amp;","&"), url, false );
1054        }
1055        else
1056        {
1057            result = makeLink( EXTERNAL, StringUtils.replace(url,"&amp;","&"), url, null, null );
1058            addElement( outlinkImage() );
1059        }
1060
1061        if( last != null )
1062        {
1063            m_plainTextBuf.append(last);
1064        }
1065
1066        return result;
1067    }
1068
1069    /**
1070     *  Image links are handled differently:
1071     *  1. If the text is a WikiName of an existing page,
1072     *     it gets linked.
1073     *  2. If the text is an external link, then it is inlined.
1074     *  3. Otherwise it becomes an ALT text.
1075     *
1076     *  @param reallink The link to the image.
1077     *  @param link     Link text portion, may be a link to somewhere else.
1078     *  @param hasLinkText If true, then the defined link had a link text available.
1079     *                  This means that the link text may be a link to a wiki page,
1080     *                  or an external resource.
1081     */
1082
1083    // FIXME: isExternalLink() is called twice.
1084    private Element handleImageLink( String reallink, String link, boolean hasLinkText )
1085    {
1086        String possiblePage = MarkupParser.cleanLink( link );
1087
1088        if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText )
1089        {
1090            return makeLink( IMAGELINK, reallink, link, null, null );
1091        }
1092        else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText )
1093        {
1094            // System.out.println("Orig="+link+", Matched: "+matchedLink);
1095            callMutatorChain( m_localLinkMutatorChain, possiblePage );
1096
1097            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
1098        }
1099        else
1100        {
1101            return makeLink( IMAGE, reallink, link, null, null );
1102        }
1103    }
1104
1105    private Element handleAccessRule( String ruleLine )
1106    {
1107        if( m_wysiwygEditorMode )
1108        {
1109            m_currentElement.addContent( "[" + ruleLine + "]" );
1110        }
1111
1112        if( !m_parseAccessRules ) return m_currentElement;
1113        Acl acl;
1114        WikiPage          page = m_context.getRealPage();
1115        // UserDatabase      db = m_context.getEngine().getUserDatabase();
1116
1117        if( ruleLine.startsWith( "{" ) )
1118            ruleLine = ruleLine.substring( 1 );
1119        if( ruleLine.endsWith( "}" ) )
1120            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1121
1122        if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine);
1123
1124        try
1125        {
1126            acl = m_engine.getAclManager().parseAcl( page, ruleLine );
1127
1128            page.setAcl( acl );
1129
1130            if( log.isDebugEnabled() ) log.debug( acl.toString() );
1131        }
1132        catch( WikiSecurityException wse )
1133        {
1134            return makeError( wse.getMessage() );
1135        }
1136
1137        return m_currentElement;
1138    }
1139
1140    /**
1141     *  Handles metadata setting [{SET foo=bar}]
1142     */
1143    private Element handleMetadata( String link )
1144    {
1145        if( m_wysiwygEditorMode )
1146        {
1147            m_currentElement.addContent( "[" + link + "]" );
1148        }
1149
1150        try
1151        {
1152            String args = link.substring( link.indexOf(' '), link.length()-1 );
1153
1154            String name = args.substring( 0, args.indexOf('=') );
1155            String val  = args.substring( args.indexOf('=')+1, args.length() );
1156
1157            name = name.trim();
1158            val  = val.trim();
1159
1160            if( val.startsWith("'") ) val = val.substring( 1 );
1161            if( val.endsWith("'") )   val = val.substring( 0, val.length()-1 );
1162
1163            // log.debug("SET name='"+name+"', value='"+val+"'.");
1164
1165            if( name.length() > 0 && val.length() > 0 )
1166            {
1167                val = m_engine.getVariableManager().expandVariables( m_context,
1168                                                                     val );
1169
1170                m_context.getPage().setAttribute( name, val );
1171            }
1172        }
1173        catch( Exception e )
1174        {
1175            ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1176            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
1177        }
1178
1179        return m_currentElement;
1180    }
1181
1182    /**
1183     *  Emits a processing instruction that will disable markup escaping. This is
1184     *  very useful if you want to emit HTML directly into the stream.
1185     *
1186     */
1187    private void disableOutputEscaping()
1188    {
1189        addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") );
1190    }
1191
1192    /**
1193     *  Gobbles up all hyperlinks that are encased in square brackets.
1194     */
1195    private Element handleHyperlinks( String linktext, int pos )
1196    {
1197        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1198
1199        StringBuilder sb = new StringBuilder(linktext.length()+80);
1200
1201        if( m_linkParsingOperations.isAccessRule( linktext ) )
1202        {
1203            return handleAccessRule( linktext );
1204        }
1205
1206        if( m_linkParsingOperations.isMetadata( linktext ) )
1207        {
1208            return handleMetadata( linktext );
1209        }
1210
1211        if( m_linkParsingOperations.isPluginLink( linktext ) )
1212        {
1213            try
1214            {
1215                PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
1216                //
1217                //  This might sometimes fail, especially if there is something which looks
1218                //  like a plugin invocation but is really not.
1219                //
1220                if( pluginContent != null )
1221                {
1222                    addElement( pluginContent );
1223
1224                    pluginContent.executeParse( m_context );
1225                }
1226            }
1227            catch( PluginException e )
1228            {
1229                log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
1230                //log.info( "Root cause:",e.getRootThrowable() );
1231                if( !m_wysiwygEditorMode )
1232                {
1233                    ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE );
1234                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ),
1235                                                                        m_context.getRealPage().getWiki(),
1236                                                                        m_context.getRealPage().getName(),
1237                                                                        e.getMessage() ) ) );
1238                }
1239            }
1240
1241            return m_currentElement;
1242        }
1243
1244        try
1245        {
1246            LinkParser.Link link = m_linkParser.parse(linktext);
1247            linktext       = link.getText();
1248            String linkref = link.getReference();
1249
1250            //
1251            //  Yes, we now have the components separated.
1252            //  linktext = the text the link should have
1253            //  linkref  = the url or page name.
1254            //
1255            //  In many cases these are the same.  [linktext|linkref].
1256            //
1257            if( m_linkParsingOperations.isVariableLink( linktext ) )
1258            {
1259                Content el = new VariableContent(linktext);
1260
1261                addElement( el );
1262            }
1263            else if( m_linkParsingOperations.isExternalLink( linkref ) )
1264            {
1265                // It's an external link, out of this Wiki
1266
1267                callMutatorChain( m_externalLinkMutatorChain, linkref );
1268
1269                if( m_linkParsingOperations.isImageLink( linkref ) )
1270                {
1271                    handleImageLink( linkref, linktext, link.hasReference() );
1272                }
1273                else
1274                {
1275                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
1276                    addElement( outlinkImage() );
1277                }
1278            }
1279            else if( link.isInterwikiLink() )
1280            {
1281                // It's an interwiki link
1282                // InterWiki links also get added to external link chain
1283                // after the links have been resolved.
1284
1285                // FIXME: There is an interesting issue here:  We probably should
1286                //        URLEncode the wikiPage, but we can't since some of the
1287                //        Wikis use slashes (/), which won't survive URLEncoding.
1288                //        Besides, we don't know which character set the other Wiki
1289                //        is using, so you'll have to write the entire name as it appears
1290                //        in the URL.  Bugger.
1291
1292                String extWiki  = link.getExternalWiki();
1293                String wikiPage = link.getExternalWikiPage();
1294
1295                if( m_wysiwygEditorMode )
1296                {
1297                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
1298                }
1299                else
1300                {
1301                    String urlReference = m_engine.getInterWikiURL( extWiki );
1302
1303                    if( urlReference != null )
1304                    {
1305                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1306                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
1307
1308                        if( m_linkParsingOperations.isImageLink(urlReference) )
1309                        {
1310                            handleImageLink( urlReference, linktext, link.hasReference() );
1311                        }
1312                        else
1313                        {
1314                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
1315                        }
1316
1317                        if( m_linkParsingOperations.isExternalLink(urlReference) )
1318                        {
1319                            addElement( outlinkImage() );
1320                        }
1321                    }
1322                    else
1323                    {
1324                        Object[] args = { escapeHTMLEntities(extWiki) };
1325
1326                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
1327                    }
1328                }
1329            }
1330            else if( linkref.startsWith("#") )
1331            {
1332                // It defines a local footnote
1333                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1334            }
1335            else if( TextUtil.isNumber( linkref ) )
1336            {
1337                // It defines a reference to a local footnote
1338                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1339            }
1340            else
1341            {
1342                int hashMark = -1;
1343
1344                //
1345                //  Internal wiki link, but is it an attachment link?
1346                //
1347                String attachment = m_engine.getAttachmentManager().getAttachmentInfoName( m_context, linkref );
1348                if( attachment != null )
1349                {
1350                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1351
1352                    if( m_linkParsingOperations.isImageLink( linkref ) )
1353                    {
1354                        attachment = m_context.getURL( WikiContext.ATTACH, attachment );
1355                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1356                    }
1357                    else
1358                    {
1359                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1360                    }
1361                }
1362                else if( (hashMark = linkref.indexOf('#')) != -1 )
1363                {
1364                    // It's an internal Wiki link, but to a named section
1365
1366                    String namedSection = linkref.substring( hashMark+1 );
1367                    linkref = linkref.substring( 0, hashMark );
1368
1369                    linkref = MarkupParser.cleanLink( linkref );
1370
1371                    callMutatorChain( m_localLinkMutatorChain, linkref );
1372
1373                    String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1374                    if( matchedLink != null ) {
1375                        String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection));
1376                        sectref = sectref.replace('%', '_');
1377                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1378                    } else {
1379                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1380                    }
1381                }
1382                else
1383                {
1384                    // It's an internal Wiki link
1385                    linkref = MarkupParser.cleanLink( linkref );
1386
1387                    callMutatorChain( m_localLinkMutatorChain, linkref );
1388
1389                    String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1390                    if( matchedLink != null ) {
1391                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1392                    } else {
1393                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1394                    }
1395                }
1396            }
1397        }
1398        catch( ParseException e )
1399        {
1400            log.info("Parser failure: ",e);
1401            Object[] args = { e.getMessage() };
1402            addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1403        }
1404
1405        return m_currentElement;
1406    }
1407
1408    /**
1409     *  Pushes back any string that has been read.  It will obviously
1410     *  be pushed back in a reverse order.
1411     *
1412     *  @since 2.1.77
1413     */
1414    private void pushBack( String s )
1415        throws IOException
1416    {
1417        for( int i = s.length()-1; i >= 0; i-- )
1418        {
1419            pushBack( s.charAt(i) );
1420        }
1421    }
1422
1423    private Element handleBackslash()
1424        throws IOException
1425    {
1426        int ch = nextToken();
1427
1428        if( ch == '\\' )
1429        {
1430            int ch2 = nextToken();
1431
1432            if( ch2 == '\\' )
1433            {
1434                pushElement( new Element("br").setAttribute("clear","all"));
1435                return popElement("br");
1436            }
1437
1438            pushBack( ch2 );
1439
1440            pushElement( new Element("br") );
1441            return popElement("br");
1442        }
1443
1444        pushBack( ch );
1445
1446        return null;
1447    }
1448
1449    private Element handleUnderscore()
1450        throws IOException
1451    {
1452        int ch = nextToken();
1453        Element el = null;
1454
1455        if( ch == '_' )
1456        {
1457            if( m_isbold )
1458            {
1459                el = popElement("b");
1460            }
1461            else
1462            {
1463                el = pushElement( new Element("b") );
1464            }
1465            m_isbold = !m_isbold;
1466        }
1467        else
1468        {
1469            pushBack( ch );
1470        }
1471
1472        return el;
1473    }
1474
1475
1476    /**
1477     *  For example: italics.
1478     */
1479    private Element handleApostrophe()
1480        throws IOException
1481    {
1482        int ch = nextToken();
1483        Element el = null;
1484
1485        if( ch == '\'' )
1486        {
1487            if( m_isitalic )
1488            {
1489                el = popElement("i");
1490            }
1491            else
1492            {
1493                el = pushElement( new Element("i") );
1494            }
1495            m_isitalic = !m_isitalic;
1496        }
1497        else
1498        {
1499            pushBack( ch );
1500        }
1501
1502        return el;
1503    }
1504
1505    private Element handleOpenbrace( boolean isBlock )
1506        throws IOException
1507    {
1508        int ch = nextToken();
1509
1510        if( ch == '{' )
1511        {
1512            int ch2 = nextToken();
1513
1514            if( ch2 == '{' )
1515            {
1516                m_isPre = true;
1517                m_isEscaping = true;
1518                m_isPreBlock = isBlock;
1519
1520                if( isBlock )
1521                {
1522                    startBlockLevel();
1523                    return pushElement( new Element("pre") );
1524                }
1525
1526                return pushElement( new Element("span").setAttribute("class","inline-code") );
1527            }
1528
1529            pushBack( ch2 );
1530
1531            return pushElement( new Element("tt") );
1532        }
1533
1534        pushBack( ch );
1535
1536        return null;
1537    }
1538
1539    /**
1540     *  Handles both }} and }}}
1541     */
1542    private Element handleClosebrace()
1543        throws IOException
1544    {
1545        int ch2 = nextToken();
1546
1547        if( ch2 == '}' )
1548        {
1549            int ch3 = nextToken();
1550
1551            if( ch3 == '}' )
1552            {
1553                if( m_isPre )
1554                {
1555                    if( m_isPreBlock )
1556                    {
1557                        popElement( "pre" );
1558                    }
1559                    else
1560                    {
1561                        popElement( "span" );
1562                    }
1563
1564                    m_isPre = false;
1565                    m_isEscaping = false;
1566                    return m_currentElement;
1567                }
1568
1569                m_plainTextBuf.append("}}}");
1570                return m_currentElement;
1571            }
1572
1573            pushBack( ch3 );
1574
1575            if( !m_isEscaping )
1576            {
1577                return popElement("tt");
1578            }
1579        }
1580
1581        pushBack( ch2 );
1582
1583        return null;
1584    }
1585
1586    private Element handleDash()
1587        throws IOException
1588    {
1589        int ch = nextToken();
1590
1591        if( ch == '-' )
1592        {
1593            int ch2 = nextToken();
1594
1595            if( ch2 == '-' )
1596            {
1597                int ch3 = nextToken();
1598
1599                if( ch3 == '-' )
1600                {
1601                    // Empty away all the rest of the dashes.
1602                    // Do not forget to return the first non-match back.
1603                    do
1604                    {
1605                        ch = nextToken();
1606                    }
1607                    while ( ch == '-' );
1608
1609                    pushBack(ch);
1610                    startBlockLevel();
1611                    pushElement( new Element("hr") );
1612                    return popElement( "hr" );
1613                }
1614
1615                pushBack( ch3 );
1616            }
1617            pushBack( ch2 );
1618        }
1619
1620        pushBack( ch );
1621
1622        return null;
1623    }
1624
1625    private Element handleHeading()
1626        throws IOException
1627    {
1628        Element el = null;
1629
1630        int ch  = nextToken();
1631
1632        Heading hd = new Heading();
1633
1634        if( ch == '!' )
1635        {
1636            int ch2 = nextToken();
1637
1638            if( ch2 == '!' )
1639            {
1640                String title = peekAheadLine();
1641
1642                el = makeHeading( Heading.HEADING_LARGE, title, hd);
1643            }
1644            else
1645            {
1646                pushBack( ch2 );
1647                String title = peekAheadLine();
1648                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1649            }
1650        }
1651        else
1652        {
1653            pushBack( ch );
1654            String title = peekAheadLine();
1655            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1656        }
1657
1658        callHeadingListenerChain( hd );
1659
1660        m_lastHeading = hd;
1661
1662        if( el != null ) pushElement(el);
1663
1664        return el;
1665    }
1666
1667    /**
1668     *  Reads the stream until the next EOL or EOF.  Note that it will also read the
1669     *  EOL from the stream.
1670     */
1671    private StringBuilder readUntilEOL()
1672        throws IOException
1673    {
1674        int ch;
1675        StringBuilder buf = new StringBuilder( 256 );
1676
1677        while( true )
1678        {
1679            ch = nextToken();
1680
1681            if( ch == -1 )
1682                break;
1683
1684            buf.append( (char) ch );
1685
1686            if( ch == '\n' )
1687                break;
1688        }
1689        return buf;
1690    }
1691
1692    /** Controls whether italic is restarted after a paragraph shift */
1693
1694    private boolean m_restartitalic = false;
1695    private boolean m_restartbold   = false;
1696
1697    private boolean m_newLine;
1698
1699    /**
1700     *  Starts a block level element, therefore closing
1701     *  a potential open paragraph tag.
1702     */
1703    private void startBlockLevel()
1704    {
1705        // These may not continue over block level limits in XHTML
1706
1707        popElement("i");
1708        popElement("b");
1709        popElement("tt");
1710
1711        if( m_isOpenParagraph )
1712        {
1713            m_isOpenParagraph = false;
1714            popElement("p");
1715            m_plainTextBuf.append("\n"); // Just small beautification
1716        }
1717
1718        m_restartitalic = m_isitalic;
1719        m_restartbold   = m_isbold;
1720
1721        m_isitalic = false;
1722        m_isbold   = false;
1723    }
1724
1725    private static String getListType( char c )
1726    {
1727        if( c == '*' )
1728        {
1729            return "ul";
1730        }
1731        else if( c == '#' )
1732        {
1733            return "ol";
1734        }
1735        throw new InternalWikiException("Parser got faulty list type: "+c);
1736    }
1737    /**
1738     *  Like original handleOrderedList() and handleUnorderedList()
1739     *  however handles both ordered ('#') and unordered ('*') mixed together.
1740     */
1741
1742    // FIXME: Refactor this; it's a bit messy.
1743
1744    private Element handleGeneralList()
1745        throws IOException
1746    {
1747         startBlockLevel();
1748
1749         String strBullets = readWhile( "*#" );
1750         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
1751         int numBullets = strBullets.length();
1752
1753         // override the beginning portion of bullet pattern to be like the previous
1754         // to simulate PHPWiki style lists
1755
1756         if(m_allowPHPWikiStyleLists)
1757         {
1758             // only substitute if different
1759             if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1760                   (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) )
1761             {
1762                 if(numBullets <= m_genlistlevel)
1763                 {
1764                     // Substitute all but the last character (keep the expressed bullet preference)
1765                     strBullets  = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "")
1766                                   + strBullets.substring(numBullets-1, numBullets);
1767                 }
1768                 else
1769                 {
1770                     strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets);
1771                 }
1772             }
1773         }
1774
1775         //
1776         //  Check if this is still of the same type
1777         //
1778         if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1779            (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) )
1780         {
1781             if( numBullets > m_genlistlevel )
1782             {
1783                 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) );
1784
1785                 for( ; m_genlistlevel < numBullets; m_genlistlevel++ )
1786                 {
1787                     // bullets are growing, get from new bullet list
1788                     pushElement( new Element("li") );
1789                     pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) ));
1790                 }
1791             }
1792             else if( numBullets < m_genlistlevel )
1793             {
1794                 //  Close the previous list item.
1795                 // buf.append( m_renderer.closeListItem() );
1796                 popElement( "li" );
1797
1798                 for( ; m_genlistlevel > numBullets; m_genlistlevel-- )
1799                 {
1800                     // bullets are shrinking, get from old bullet list
1801
1802                     popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1803                     if( m_genlistlevel > 0 )
1804                     {
1805                         popElement( "li" );
1806                     }
1807
1808                 }
1809             }
1810             else
1811             {
1812                 if( m_genlistlevel > 0 )
1813                 {
1814                     popElement( "li" );
1815                 }
1816             }
1817         }
1818         else
1819         {
1820             //
1821             //  The pattern has changed, unwind and restart
1822             //
1823             int  numEqualBullets;
1824             int  numCheckBullets;
1825
1826             // find out how much is the same
1827             numEqualBullets = 0;
1828             numCheckBullets = Math.min(numBullets,m_genlistlevel);
1829
1830             while( numEqualBullets < numCheckBullets )
1831             {
1832                 // if the bullets are equal so far, keep going
1833                 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets))
1834                     numEqualBullets++;
1835                 // otherwise giveup, we have found how many are equal
1836                 else
1837                     break;
1838             }
1839
1840             //unwind
1841             for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- )
1842             {
1843                 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) );
1844                 if( m_genlistlevel > numBullets )
1845                 {
1846                     popElement("li");
1847                 }
1848             }
1849
1850             //rewind
1851
1852             pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) );
1853             for(int i = numEqualBullets; i < numBullets; i++)
1854             {
1855                 pushElement( new Element("li") );
1856                 pushElement( new Element( getListType( strBullets.charAt(i) ) ) );
1857             }
1858             m_genlistlevel = numBullets;
1859         }
1860
1861         //
1862         //  Push a new list item, and eat away any extra whitespace
1863         //
1864         pushElement( new Element("li") );
1865         readWhile(" ");
1866
1867         // work done, remember the new bullet list (in place of old one)
1868         m_genlistBulletBuffer.setLength(0);
1869         m_genlistBulletBuffer.append(strBullets);
1870
1871         return m_currentElement;
1872    }
1873
1874    private Element unwindGeneralList()
1875    {
1876        //unwind
1877        for( ; m_genlistlevel > 0; m_genlistlevel-- )
1878        {
1879            popElement( "li" );
1880            popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1881        }
1882
1883        m_genlistBulletBuffer.setLength(0);
1884
1885        return null;
1886    }
1887
1888
1889    private Element handleDefinitionList()
1890        throws IOException
1891    {
1892        if( !m_isdefinition )
1893        {
1894            m_isdefinition = true;
1895
1896            startBlockLevel();
1897
1898            pushElement( new Element("dl") );
1899            return pushElement( new Element("dt") );
1900        }
1901
1902        return null;
1903    }
1904
1905    private Element handleOpenbracket()
1906        throws IOException
1907    {
1908        StringBuilder sb = new StringBuilder(40);
1909        int pos = getPosition();
1910        int ch = nextToken();
1911        boolean isPlugin = false;
1912
1913        if( ch == '[' )
1914        {
1915            if( m_wysiwygEditorMode )
1916            {
1917                sb.append( '[' );
1918            }
1919
1920            sb.append( (char)ch );
1921
1922            while( (ch = nextToken()) == '[' )
1923            {
1924                sb.append( (char)ch );
1925            }
1926        }
1927
1928
1929        if( ch == '{' )
1930        {
1931            isPlugin = true;
1932        }
1933
1934        pushBack( ch );
1935
1936        if( sb.length() > 0 )
1937        {
1938            m_plainTextBuf.append( sb );
1939            return m_currentElement;
1940        }
1941
1942        //
1943        //  Find end of hyperlink
1944        //
1945
1946        ch = nextToken();
1947        int nesting = 1;    // Check for nested plugins
1948
1949        while( ch != -1 )
1950        {
1951            int ch2 = nextToken(); pushBack(ch2);
1952
1953            if( isPlugin )
1954            {
1955                if( ch == '[' && ch2 == '{' )
1956                {
1957                    nesting++;
1958                }
1959                else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' )
1960                {
1961                    break;
1962                }
1963                else if( ch == '}' && ch2 == ']' )
1964                {
1965                    // NB: This will be decremented once at the end
1966                    nesting--;
1967                }
1968            }
1969            else
1970            {
1971                if( ch == ']' )
1972                {
1973                    break;
1974                }
1975            }
1976
1977            sb.append( (char) ch );
1978
1979            ch = nextToken();
1980        }
1981
1982        //
1983        //  If the link is never finished, do some tricks to display the rest of the line
1984        //  unchanged.
1985        //
1986        if( ch == -1 )
1987        {
1988            log.debug("Warning: unterminated link detected!");
1989            m_isEscaping = true;
1990            m_plainTextBuf.append( sb );
1991            flushPlainText();
1992            m_isEscaping = false;
1993            return m_currentElement;
1994        }
1995
1996        return handleHyperlinks( sb.toString(), pos );
1997    }
1998
1999    /**
2000     *  Reads the stream until the current brace is closed or stream end.
2001     */
2002    private String readBraceContent( char opening, char closing )
2003        throws IOException
2004    {
2005        StringBuilder sb = new StringBuilder(40);
2006        int braceLevel = 1;
2007        int ch;
2008        while(( ch = nextToken() ) != -1 )
2009        {
2010            if( ch == '\\' )
2011            {
2012                continue;
2013            }
2014            else if ( ch == opening )
2015            {
2016                braceLevel++;
2017            }
2018            else if ( ch == closing )
2019            {
2020                braceLevel--;
2021                if (braceLevel==0)
2022                {
2023                  break;
2024                }
2025            }
2026            sb.append( (char)ch );
2027        }
2028        return sb.toString();
2029    }
2030
2031
2032    /**
2033     *  Handles constructs of type %%(style) and %%class
2034     * @param newLine
2035     * @return An Element containing the div or span, depending on the situation.
2036     * @throws IOException
2037     */
2038    private Element handleDiv( boolean newLine )
2039        throws IOException
2040    {
2041        int ch = nextToken();
2042        Element el = null;
2043
2044        if( ch == '%' )
2045        {
2046            String style = null;
2047            String clazz = null;
2048
2049            ch = nextToken();
2050
2051            //
2052            //  Style or class?
2053            //
2054            if( ch == '(' )
2055            {
2056                style = readBraceContent('(',')');
2057            }
2058            else if( Character.isLetter( (char) ch ) )
2059            {
2060                pushBack( ch );
2061                clazz = readUntil( "( \t\n\r" );
2062                //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters
2063                //CSS Classnames can contain only the characters [a-zA-Z0-9] and
2064                //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_".
2065                //They cannot start with a digit, two hyphens, or a hyphen followed by a digit.
2066
2067                //(1) replace '.' by spaces, allowing multiple classnames on a div or span
2068                //(2) remove any invalid character
2069                if( clazz != null){
2070
2071                    clazz = clazz.replace('.', ' ')
2072                                 .replaceAll("[^\\s-_\\w\\x200-\\x377]+","");
2073
2074                }
2075                ch = nextToken();
2076
2077                //check for %%class1.class2( style information )
2078                if( ch == '(' )
2079                {
2080                    style = readBraceContent('(',')');
2081                }
2082                //
2083                //  Pop out only spaces, so that the upcoming EOL check does not check the
2084                //  next line.
2085                //
2086                else if( ch == '\n' || ch == '\r' )
2087                {
2088                    pushBack(ch);
2089                }
2090            }
2091            else
2092            {
2093                //
2094                // Anything else stops.
2095                //
2096
2097                pushBack(ch);
2098
2099                try
2100                {
2101                    Boolean isSpan = m_styleStack.pop();
2102
2103                    if( isSpan == null )
2104                    {
2105                        // Fail quietly
2106                    }
2107                    else if( isSpan.booleanValue() )
2108                    {
2109                        el = popElement( "span" );
2110                    }
2111                    else
2112                    {
2113                        el = popElement( "div" );
2114                    }
2115                }
2116                catch( EmptyStackException e )
2117                {
2118                    log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened.");
2119                    return m_currentElement;
2120                }
2121
2122                return el;
2123            }
2124
2125            //
2126            //  Check if there is an attempt to do something nasty
2127            //
2128
2129            try
2130            {
2131                style = StringEscapeUtils.unescapeHtml(style);
2132                if( style != null && style.indexOf("javascript:") != -1 )
2133                {
2134                    log.debug("Attempt to output javascript within CSS:"+style);
2135                    ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2136                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
2137                }
2138            }
2139            catch( NumberFormatException e )
2140            {
2141                //
2142                //  If there are unknown entities, we don't want the parser to stop.
2143                //
2144                ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2145                String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
2146                return addElement( makeError( msg ) );
2147            }
2148
2149            //
2150            //  Decide if we should open a div or a span?
2151            //
2152            String eol = peekAheadLine();
2153
2154            if( eol.trim().length() > 0 )
2155            {
2156                // There is stuff after the class
2157
2158                el = new Element("span");
2159
2160                m_styleStack.push( Boolean.TRUE );
2161            }
2162            else
2163            {
2164                startBlockLevel();
2165                el = new Element("div");
2166                m_styleStack.push( Boolean.FALSE );
2167            }
2168
2169            if( style != null ) el.setAttribute("style", style);
2170            if( clazz != null ) el.setAttribute("class", clazz);
2171            el = pushElement( el );
2172
2173            return el;
2174        }
2175
2176        pushBack(ch);
2177
2178        return el;
2179    }
2180
2181    private Element handleSlash( boolean newLine )
2182        throws IOException
2183    {
2184        int ch = nextToken();
2185
2186        pushBack(ch);
2187        if( ch == '%' && !m_styleStack.isEmpty() )
2188        {
2189            return handleDiv( newLine );
2190        }
2191
2192        return null;
2193    }
2194
2195    private Element handleBar( boolean newLine )
2196        throws IOException
2197    {
2198        Element el = null;
2199
2200        if( !m_istable && !newLine )
2201        {
2202            return null;
2203        }
2204
2205        //
2206        //  If the bar is in the first column, we will either start
2207        //  a new table or continue the old one.
2208        //
2209
2210        if( newLine )
2211        {
2212            if( !m_istable )
2213            {
2214                startBlockLevel();
2215                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
2216                m_istable = true;
2217                m_rowNum = 0;
2218            }
2219
2220            m_rowNum++;
2221            Element tr = ( m_rowNum % 2 != 0 )
2222                       ? new Element("tr").setAttribute("class", "odd")
2223                       : new Element("tr");
2224            el = pushElement( tr );
2225        }
2226
2227        //
2228        //  Check out which table cell element to start;
2229        //  a header element (th) or a regular element (td).
2230        //
2231        int ch = nextToken();
2232
2233        if( ch == '|' )
2234        {
2235            if( !newLine )
2236            {
2237                el = popElement("th");
2238                if( el == null ) popElement("td");
2239            }
2240            el = pushElement( new Element("th") );
2241        }
2242        else
2243        {
2244            if( !newLine )
2245            {
2246                el = popElement("td");
2247                if( el == null ) popElement("th");
2248            }
2249
2250            el = pushElement( new Element("td") );
2251
2252            pushBack( ch );
2253        }
2254
2255        return el;
2256    }
2257
2258    /**
2259     *  Generic escape of next character or entity.
2260     */
2261    private Element handleTilde()
2262        throws IOException
2263    {
2264        int ch = nextToken();
2265
2266        if( ch == ' ' )
2267        {
2268            if( m_wysiwygEditorMode )
2269            {
2270                m_plainTextBuf.append( "~ " );
2271            }
2272            return m_currentElement;
2273        }
2274
2275        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
2276            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
2277            ch == '{' || ch == ']' || ch == '}' || ch == '%' )
2278        {
2279            if( m_wysiwygEditorMode )
2280            {
2281                m_plainTextBuf.append( '~' );
2282            }
2283
2284            m_plainTextBuf.append( (char)ch );
2285            m_plainTextBuf.append(readWhile( ""+(char)ch ));
2286            return m_currentElement;
2287        }
2288
2289        // No escape.
2290        pushBack( ch );
2291
2292        return null;
2293    }
2294
2295    private void fillBuffer( Element startElement )
2296        throws IOException
2297    {
2298        m_currentElement = startElement;
2299
2300        boolean quitReading = false;
2301        m_newLine = true;
2302        disableOutputEscaping();
2303
2304        while(!quitReading)
2305        {
2306            int ch = nextToken();
2307
2308            if( ch == -1 ) break;
2309
2310            //
2311            //  Check if we're actually ending the preformatted mode.
2312            //  We still must do an entity transformation here.
2313            //
2314            if( m_isEscaping )
2315            {
2316                if( ch == '}' )
2317                {
2318                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
2319                }
2320                else if( ch == -1 )
2321                {
2322                    quitReading = true;
2323                }
2324                else if( ch == '\r' )
2325                {
2326                    // DOS line feeds we ignore.
2327                }
2328                else if( ch == '<' )
2329                {
2330                    m_plainTextBuf.append( "&lt;" );
2331                }
2332                else if( ch == '>' )
2333                {
2334                    m_plainTextBuf.append( "&gt;" );
2335                }
2336                else if( ch == '&' )
2337                {
2338                    m_plainTextBuf.append( "&amp;" );
2339                }
2340                else if( ch == '~' )
2341                {
2342                    String braces = readWhile("}");
2343                    if( braces.length() >= 3 )
2344                    {
2345                        m_plainTextBuf.append("}}}");
2346
2347                        braces = braces.substring(3);
2348                    }
2349                    else
2350                    {
2351                        m_plainTextBuf.append( (char) ch );
2352                    }
2353
2354                    for( int i = braces.length()-1; i >= 0; i-- )
2355                    {
2356                        pushBack(braces.charAt(i));
2357                    }
2358                }
2359                else
2360                {
2361                    m_plainTextBuf.append( (char) ch );
2362                }
2363
2364                continue;
2365            }
2366
2367            //
2368            //  An empty line stops a list
2369            //
2370            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 )
2371            {
2372                m_plainTextBuf.append(unwindGeneralList());
2373            }
2374
2375            if( m_newLine && ch != '|' && m_istable )
2376            {
2377                popElement("table");
2378                m_istable = false;
2379            }
2380
2381            int skip = IGNORE;
2382
2383            //
2384            //  Do the actual parsing and catch any errors.
2385            //
2386            try
2387            {
2388                skip = parseToken( ch );
2389            }
2390            catch( IllegalDataException e )
2391            {
2392                log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage());
2393
2394                makeError("Error: "+cleanupSuspectData(e.getMessage()) );
2395            }
2396
2397            //
2398            //   The idea is as follows:  If the handler method returns
2399            //   an element (el != null), it is assumed that it has been
2400            //   added in the stack.  Otherwise the character is added
2401            //   as is to the plaintext buffer.
2402            //
2403            //   For the transition phase, if s != null, it also gets
2404            //   added in the plaintext buffer.
2405            //
2406
2407            switch( skip )
2408            {
2409                case ELEMENT:
2410                    m_newLine = false;
2411                    break;
2412
2413                case CHARACTER:
2414                    m_plainTextBuf.append( (char) ch );
2415                    m_newLine = false;
2416                    break;
2417
2418                case IGNORE:
2419                default:
2420                    break;
2421            }
2422        }
2423
2424        closeHeadings();
2425        popElement("domroot");
2426    }
2427
2428    private String cleanupSuspectData( String s )
2429    {
2430        StringBuilder sb = new StringBuilder( s.length() );
2431
2432        for( int i = 0; i < s.length(); i++ )
2433        {
2434            char c = s.charAt(i);
2435
2436            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
2437            else sb.append( "0x"+Integer.toString(c,16).toUpperCase() );
2438        }
2439
2440        return sb.toString();
2441    }
2442
2443    /** The token is a plain character. */
2444    protected static final int CHARACTER = 0;
2445
2446    /** The token is a wikimarkup element. */
2447    protected static final int ELEMENT   = 1;
2448
2449    /** The token is to be ignored. */
2450    protected static final int IGNORE    = 2;
2451
2452    /**
2453     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
2454     *  you think this was a wiki markup element, and IGNORE, if you think
2455     *  we should ignore this altogether.
2456     *  <p>
2457     *  To add your own MarkupParser, you can override this method, but it
2458     *  is recommended that you call super.parseToken() as well to gain advantage
2459     *  of JSPWiki's own markup.  You can call it at the start of your own
2460     *  parseToken() or end - it does not matter.
2461     *
2462     * @param ch The character under investigation
2463     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2464     * @throws IOException If parsing fails.
2465     */
2466    protected int parseToken( int ch )
2467        throws IOException
2468    {
2469        Element el = null;
2470
2471        //
2472        //  Now, check the incoming token.
2473        //
2474        switch( ch )
2475        {
2476          case '\r':
2477            // DOS linefeeds we forget
2478            return IGNORE;
2479
2480          case '\n':
2481            //
2482            //  Close things like headings, etc.
2483            //
2484
2485            // FIXME: This is not really very fast
2486
2487            closeHeadings();
2488
2489            popElement("dl"); // Close definition lists.
2490            if( m_istable )
2491            {
2492                popElement("tr");
2493            }
2494
2495            m_isdefinition = false;
2496
2497            if( m_newLine )
2498            {
2499                // Paragraph change.
2500                startBlockLevel();
2501
2502                //
2503                //  Figure out which elements cannot be enclosed inside
2504                //  a <p></p> pair according to XHTML rules.
2505                //
2506                String nextLine = peekAheadLine();
2507                if( nextLine.length() == 0 ||
2508                    (nextLine.length() > 0 &&
2509                     !nextLine.startsWith("{{{") &&
2510                     !nextLine.startsWith("----") &&
2511                     !nextLine.startsWith("%%") &&
2512                     "*#!;".indexOf( nextLine.charAt(0) ) == -1) )
2513                {
2514                    pushElement( new Element("p") );
2515                    m_isOpenParagraph = true;
2516
2517                    if( m_restartitalic )
2518                    {
2519                        pushElement( new Element("i") );
2520                        m_isitalic = true;
2521                        m_restartitalic = false;
2522                    }
2523                    if( m_restartbold )
2524                    {
2525                        pushElement( new Element("b") );
2526                        m_isbold = true;
2527                        m_restartbold = false;
2528                    }
2529                }
2530            }
2531            else
2532            {
2533                m_plainTextBuf.append("\n");
2534                m_newLine = true;
2535            }
2536            return IGNORE;
2537
2538
2539          case '\\':
2540            el = handleBackslash();
2541            break;
2542
2543          case '_':
2544            el = handleUnderscore();
2545            break;
2546
2547          case '\'':
2548            el = handleApostrophe();
2549            break;
2550
2551          case '{':
2552            el = handleOpenbrace( m_newLine );
2553            break;
2554
2555          case '}':
2556            el = handleClosebrace();
2557            break;
2558
2559          case '-':
2560            if( m_newLine )
2561                el = handleDash();
2562
2563            break;
2564
2565          case '!':
2566            if( m_newLine )
2567            {
2568                el = handleHeading();
2569            }
2570            break;
2571
2572          case ';':
2573            if( m_newLine )
2574            {
2575                el = handleDefinitionList();
2576            }
2577            break;
2578
2579          case ':':
2580            if( m_isdefinition )
2581            {
2582                popElement("dt");
2583                el = pushElement( new Element("dd") );
2584                m_isdefinition = false;
2585            }
2586            break;
2587
2588          case '[':
2589            el = handleOpenbracket();
2590            break;
2591
2592          case '*':
2593            if( m_newLine )
2594            {
2595                pushBack('*');
2596                el = handleGeneralList();
2597            }
2598            break;
2599
2600          case '#':
2601            if( m_newLine )
2602            {
2603                pushBack('#');
2604                el = handleGeneralList();
2605            }
2606            break;
2607
2608          case '|':
2609            el = handleBar( m_newLine );
2610            break;
2611
2612          case '~':
2613            el = handleTilde();
2614            break;
2615
2616          case '%':
2617            el = handleDiv( m_newLine );
2618            break;
2619
2620          case '/':
2621            el = handleSlash( m_newLine );
2622            break;
2623
2624          default:
2625            break;
2626        }
2627
2628        return el != null ? ELEMENT : CHARACTER;
2629    }
2630
2631    private void closeHeadings()
2632    {
2633        if( m_lastHeading != null && !m_wysiwygEditorMode )
2634        {
2635            // Add the hash anchor element at the end of the heading
2636            addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) );
2637            m_lastHeading = null;
2638        }
2639        popElement("h2");
2640        popElement("h3");
2641        popElement("h4");
2642    }
2643
2644    /**
2645     *  Parses the entire document from the Reader given in the constructor or
2646     *  set by {@link #setInputReader(Reader)}.
2647     *
2648     *  @return A WikiDocument, ready to be passed to the renderer.
2649     *  @throws IOException If parsing cannot be accomplished.
2650     */
2651    @Override
2652    public WikiDocument parse()
2653        throws IOException
2654    {
2655        WikiDocument d = new WikiDocument( m_context.getPage() );
2656        d.setContext( m_context );
2657
2658        Element rootElement = new Element("domroot");
2659
2660        d.setRootElement( rootElement );
2661
2662        fillBuffer( rootElement );
2663
2664        paragraphify(rootElement);
2665
2666        return d;
2667    }
2668
2669    /**
2670     *  Checks out that the first paragraph is correctly installed.
2671     *
2672     *  @param rootElement
2673     */
2674    private void paragraphify(Element rootElement)
2675    {
2676        //
2677        //  Add the paragraph tag to the first paragraph
2678        //
2679        List< Content > kids = rootElement.getContent();
2680
2681        if( rootElement.getChild("p") != null )
2682        {
2683            ArrayList<Content> ls = new ArrayList<>();
2684            int idxOfFirstContent = 0;
2685            int count = 0;
2686
2687            for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ )
2688            {
2689                Content c = i.next();
2690                if( c instanceof Element )
2691                {
2692                    String name = ( ( Element )c ).getName();
2693                    if( isBlockLevel( name ) ) break;
2694                }
2695
2696                if( !(c instanceof ProcessingInstruction) )
2697                {
2698                    ls.add( c );
2699                    if( idxOfFirstContent == 0 ) idxOfFirstContent = count;
2700                }
2701            }
2702
2703            //
2704            //  If there were any elements, then add a new <p> (unless it would
2705            //  be an empty one)
2706            //
2707            if( ls.size() > 0 )
2708            {
2709                Element newel = new Element("p");
2710
2711                for( Iterator< Content > i = ls.iterator(); i.hasNext(); )
2712                {
2713                    Content c = i.next();
2714
2715                    c.detach();
2716                    newel.addContent(c);
2717                }
2718
2719                //
2720                // Make sure there are no empty <p/> tags added.
2721                //
2722                if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() )
2723                    rootElement.addContent(idxOfFirstContent, newel);
2724            }
2725        }
2726    }
2727
2728}