001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import java.io.IOException;
022import java.io.Reader;
023import java.io.StringReader;
024import java.text.MessageFormat;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collection;
028import java.util.EmptyStackException;
029import java.util.HashMap;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Map;
033import java.util.Properties;
034import java.util.ResourceBundle;
035import java.util.Stack;
036
037import javax.xml.transform.Result;
038
039import org.apache.commons.lang.StringEscapeUtils;
040import org.apache.commons.lang.StringUtils;
041import org.apache.log4j.Logger;
042import org.apache.oro.text.regex.MalformedPatternException;
043import org.apache.oro.text.regex.MatchResult;
044import org.apache.oro.text.regex.Pattern;
045import org.apache.oro.text.regex.PatternCompiler;
046import org.apache.oro.text.regex.PatternMatcher;
047import org.apache.oro.text.regex.Perl5Compiler;
048import org.apache.oro.text.regex.Perl5Matcher;
049import org.apache.wiki.InternalWikiException;
050import org.apache.wiki.StringTransmutator;
051import org.apache.wiki.WikiContext;
052import org.apache.wiki.WikiPage;
053import org.apache.wiki.api.exceptions.PluginException;
054import org.apache.wiki.api.plugin.WikiPlugin;
055import org.apache.wiki.auth.WikiSecurityException;
056import org.apache.wiki.auth.acl.Acl;
057import org.apache.wiki.i18n.InternationalizationManager;
058import org.apache.wiki.preferences.Preferences;
059import org.apache.wiki.render.CleanTextRenderer;
060import org.apache.wiki.render.RenderingManager;
061import org.apache.wiki.util.TextUtil;
062import org.jdom2.Attribute;
063import org.jdom2.Content;
064import org.jdom2.Element;
065import org.jdom2.IllegalDataException;
066import org.jdom2.ProcessingInstruction;
067import org.jdom2.Verifier;
068
069/**
070 *  Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the
071 *  heart and soul of JSPWiki : make sure you test properly anything that is added,
072 *  or else it breaks down horribly.
073 *
074 *  @since  2.4
075 */
076public class JSPWikiMarkupParser extends MarkupParser {
077
078    protected static final int              READ          = 0;
079    protected static final int              EDIT          = 1;
080    protected static final int              EMPTY         = 2;  // Empty message
081    protected static final int              LOCAL         = 3;
082    protected static final int              LOCALREF      = 4;
083    protected static final int              IMAGE         = 5;
084    protected static final int              EXTERNAL      = 6;
085    protected static final int              INTERWIKI     = 7;
086    protected static final int              IMAGELINK     = 8;
087    protected static final int              IMAGEWIKILINK = 9;
088    protected static final int              ATTACHMENT    = 10;
089
090    private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class );
091
092    private boolean        m_isbold       = false;
093    private boolean        m_isitalic     = false;
094    private boolean        m_istable      = false;
095    private boolean        m_isPre        = false;
096    private boolean        m_isEscaping   = false;
097    private boolean        m_isdefinition = false;
098    private boolean        m_isPreBlock   = false;
099
100    /** Contains style information, in multiple forms. */
101    private Stack<Boolean> m_styleStack   = new Stack<Boolean>();
102
103     // general list handling
104    private int            m_genlistlevel = 0;
105    private StringBuilder  m_genlistBulletBuffer = new StringBuilder(10);  // stores the # and * pattern
106    private boolean        m_allowPHPWikiStyleLists = true;
107
108    private boolean        m_isOpenParagraph = false;
109
110    /** Parser for extended link functionality. */
111    private LinkParser     m_linkParser = new LinkParser();
112
113    /** Keeps track of any plain text that gets put in the Text nodes */
114    private StringBuilder  m_plainTextBuf = new StringBuilder(20);
115
116    private Element        m_currentElement;
117
118    /** Keep track of duplicate header names.  */
119    private Map<String, Integer>   m_titleSectionCounter = new HashMap<String, Integer>();
120
121    /** If true, consider CamelCase hyperlinks as well. */
122    public static final String     PROP_CAMELCASELINKS   = "jspwiki.translatorReader.camelCaseLinks";
123
124    /** If true, all hyperlinks are translated as well, regardless whether they
125        are surrounded by brackets. */
126    public static final String     PROP_PLAINURIS        = "jspwiki.translatorReader.plainUris";
127
128    /** If true, all outward attachment info links have a small link image appended. */
129    public static final String     PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
130
131    /** If true, then considers CamelCase links as well. */
132    private boolean                m_camelCaseLinks      = false;
133
134    /** If true, then generate special output for wysiwyg editing in certain cases */
135    private boolean                m_wysiwygEditorMode     = false;
136
137    /** If true, consider URIs that have no brackets as well. */
138    // FIXME: Currently reserved, but not used.
139    private boolean                m_plainUris           = false;
140
141    /** If true, all outward links use a small link image. */
142    private boolean                m_useOutlinkImage     = true;
143
144    private boolean                m_useAttachmentImage  = true;
145
146    /** If true, allows raw HTML. */
147    private boolean                m_allowHTML           = false;
148
149    private boolean                m_useRelNofollow      = false;
150
151    private PatternCompiler        m_compiler = new Perl5Compiler();
152
153    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
154
155    private PatternMatcher         m_camelCaseMatcher = new Perl5Matcher();
156    private Pattern                m_camelCasePattern;
157
158    private int                    m_rowNum              = 1;
159
160    private Heading                m_lastHeading         = null;
161
162    private static final String CAMELCASE_PATTERN     = "JSPWikiMarkupParser.camelCasePattern";
163
164    /**
165     *  Creates a markup parser.
166     *
167     *  @param context The WikiContext which controls the parsing
168     *  @param in Where the data is read from.
169     */
170    public JSPWikiMarkupParser( WikiContext context, Reader in )
171    {
172        super( context, in );
173        initialize();
174    }
175
176    // FIXME: parsers should be pooled for better performance.
177    private void initialize()
178    {
179        initInlineImagePatterns();
180
181        m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN );
182        if( m_camelCasePattern == null )
183        {
184            try
185            {
186                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,
187                                                         Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
188            }
189            catch( MalformedPatternException e )
190            {
191                log.fatal("Internal error: Someone put in a faulty pattern.",e);
192                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e);
193            }
194            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
195        }
196        //
197        //  Set the properties.
198        //
199        Properties props      = m_engine.getWikiProperties();
200
201        String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
202
203        if( cclinks != null )
204        {
205            m_camelCaseLinks = TextUtil.isPositive( cclinks );
206        }
207        else
208        {
209            m_camelCaseLinks  = TextUtil.getBooleanProperty( props,
210                                                             PROP_CAMELCASELINKS,
211                                                             m_camelCaseLinks );
212        }
213
214        Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE );
215        if( wysiwygVariable != null )
216        {
217            m_wysiwygEditorMode = wysiwygVariable.booleanValue();
218        }
219
220        m_plainUris           = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris );
221        m_useOutlinkImage     = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage );
222        m_useAttachmentImage  = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage );
223        m_allowHTML           = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML );
224        m_useRelNofollow      = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow );
225
226        if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null )
227        {
228            disableAccessRules();
229        }
230
231        m_context.getPage().setHasMetadata();
232    }
233
234    /**
235     *  Calls a transmutator chain.
236     *
237     *  @param list Chain to call
238     *  @param text Text that should be passed to the mutate() method
239     *              of each of the mutators in the chain.
240     *  @return The result of the mutation.
241     */
242
243    protected String callMutatorChain( Collection list, String text )
244    {
245        if( list == null || list.size() == 0 )
246        {
247            return text;
248        }
249
250        for( Iterator i = list.iterator(); i.hasNext(); )
251        {
252            StringTransmutator m = (StringTransmutator) i.next();
253
254            text = m.mutate( m_context, text );
255        }
256
257        return text;
258    }
259
260    /**
261     * Calls the heading listeners.
262     *
263     * @param param A Heading object.
264     */
265    protected void callHeadingListenerChain( Heading param )
266    {
267        List< HeadingListener > list = m_headingListenerChain;
268
269        for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); )
270        {
271            HeadingListener h = i.next();
272
273            h.headingAdded( m_context, param );
274        }
275    }
276
277    /**
278     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
279     *  if you really know what you are doing.
280     *
281     *  @param type One of the types above
282     *  @param link URL to which to link to
283     *  @param text Link text
284     *  @param section If a particular section identifier is required.
285     *  @return An A element.
286     *  @since 2.4.78
287     */
288    protected Element createAnchor(int type, String link, String text, String section)
289    {
290        text = escapeHTMLEntities( text );
291        section = escapeHTMLEntities( section );
292        Element el = new Element("a");
293        el.setAttribute("class",CLASS_TYPES[type]);
294        el.setAttribute("href",link+section);
295        el.addContent(text);
296        return el;
297    }
298
299    private Element makeLink( int type, String link, String text, String section, Iterator attributes )
300    {
301        Element el = null;
302
303        if( text == null ) text = link;
304
305        text = callMutatorChain( m_linkMutators, text );
306
307        section = (section != null) ? ("#"+section) : "";
308
309        // Make sure we make a link name that can be accepted
310        // as a valid URL.
311
312        if( link.length() == 0 )
313        {
314            type = EMPTY;
315        }
316        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
317
318        switch(type)
319        {
320            case READ:
321                el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section );
322                break;
323
324            case EDIT:
325                el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" );
326                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
327
328                break;
329
330            case EMPTY:
331                el = new Element("u").addContent(text);
332                break;
333
334                //
335                //  These two are for local references - footnotes and
336                //  references to footnotes.
337                //  We embed the page name (or whatever WikiContext gives us)
338                //  to make sure the links are unique across Wiki.
339                //
340            case LOCALREF:
341                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
342                break;
343
344            case LOCAL:
345                el = new Element("a").setAttribute("class",CLASS_FOOTNOTE);
346                el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1));
347                el.addContent("["+text+"]");
348                break;
349
350                //
351                //  With the image, external and interwiki types we need to
352                //  make sure nobody can put in Javascript or something else
353                //  annoying into the links themselves.  We do this by preventing
354                //  a haxor from stopping the link name short with quotes in
355                //  fillBuffer().
356                //
357            case IMAGE:
358                el = new Element("img").setAttribute("class","inline");
359                el.setAttribute("src",link);
360                el.setAttribute("alt",text);
361                break;
362
363            case IMAGELINK:
364                el = new Element("img").setAttribute("class","inline");
365                el.setAttribute("src",link);
366                el.setAttribute("alt",text);
367                el = createAnchor(IMAGELINK,text,"","").addContent(el);
368                break;
369
370            case IMAGEWIKILINK:
371                String pagelink = m_context.getURL(WikiContext.VIEW,text);
372                el = new Element("img").setAttribute("class","inline");
373                el.setAttribute("src",link);
374                el.setAttribute("alt",text);
375                el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el);
376                break;
377
378            case EXTERNAL:
379                el = createAnchor( EXTERNAL, link, text, section );
380                if( m_useRelNofollow ) el.setAttribute("rel","nofollow");
381                break;
382
383            case INTERWIKI:
384                el = createAnchor( INTERWIKI, link, text, section );
385                break;
386
387            case ATTACHMENT:
388                String attlink = m_context.getURL( WikiContext.ATTACH,
389                                                   link );
390
391                String infolink = m_context.getURL( WikiContext.INFO,
392                                                    link );
393
394                String imglink = m_context.getURL( WikiContext.NONE,
395                                                   "images/attachment_small.png" );
396
397                el = createAnchor( ATTACHMENT, attlink, text, "" );
398
399                pushElement(el);
400                popElement(el.getName());
401
402                if( m_useAttachmentImage )
403                {
404                    el = new Element("img").setAttribute("src",imglink);
405                    el.setAttribute("border","0");
406                    el.setAttribute("alt","(info)");
407
408                    el = new Element("a").setAttribute("href",infolink).addContent(el);
409                    el.setAttribute("class","infolink");
410                }
411                else
412                {
413                    el = null;
414                }
415                break;
416
417            default:
418                break;
419        }
420
421        if( el != null && attributes != null )
422        {
423            while( attributes.hasNext() )
424            {
425                Attribute attr = (Attribute)attributes.next();
426                if( attr != null )
427                {
428                    el.setAttribute(attr);
429                }
430            }
431        }
432
433        if( el != null )
434        {
435            flushPlainText();
436            m_currentElement.addContent( el );
437        }
438        return el;
439    }
440
441    /**
442     *  Figures out if a link is an off-site link.  This recognizes
443     *  the most common protocols by checking how it starts.
444     *
445     *  @param link The link to check.
446     *  @return true, if this is a link outside of this wiki.
447     *  @since 2.4
448     *  @deprecated - use {@link LinkParsingOperations#isExternalLink(String)} instead.
449     */
450    @Deprecated
451    public static boolean isExternalLink( String link )
452    {
453        return new LinkParsingOperations(null).isExternalLink( link );
454    }
455
456    /**
457     *  Returns true if the link is really command to insert
458     *  a plugin.
459     *  <P>
460     *  Currently we just check if the link starts with "{INSERT",
461     *  or just plain "{" but not "{$".
462     *
463     *  @param link Link text, i.e. the contents of text between [].
464     *  @return True, if this link seems to be a command to insert a plugin here.
465     *  @deprecated Use {@link LinkParsingOperations#isPluginLink(String)} instead,
466     */
467    @Deprecated
468    public static boolean isPluginLink( String link )
469    {
470        return new LinkParsingOperations( null ).isPluginLink( link );
471    }
472
473    /**
474     *  These are all of the HTML 4.01 block-level elements.
475     */
476    private static final String[] BLOCK_ELEMENTS = {
477        "address", "blockquote", "div", "dl", "fieldset", "form",
478        "h1", "h2", "h3", "h4", "h5", "h6",
479        "hr", "noscript", "ol", "p", "pre", "table", "ul"
480    };
481
482    private static boolean isBlockLevel( String name )
483    {
484        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
485    }
486
487    /**
488     *  This method peeks ahead in the stream until EOL and returns the result.
489     *  It will keep the buffers untouched.
490     *
491     *  @return The string from the current position to the end of line.
492     */
493
494    // FIXME: Always returns an empty line, even if the stream is full.
495    private String peekAheadLine()
496        throws IOException
497    {
498        String s = readUntilEOL().toString();
499
500        if( s.length() > PUSHBACK_BUFFER_SIZE )
501        {
502            log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters.  Attempting to recover...");
503            pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) );
504        }
505        else
506        {
507            try
508            {
509                pushBack( s );
510            }
511            catch( IOException e )
512            {
513                log.warn("Pushback failed: the line is probably too long.  Attempting to recover.");
514            }
515        }
516        return s;
517    }
518
519    private int flushPlainText()
520    {
521        int numChars = m_plainTextBuf.length();
522
523        if( numChars > 0 )
524        {
525            String buf;
526
527            if( !m_allowHTML )
528            {
529                buf = escapeHTMLEntities(m_plainTextBuf.toString());
530            }
531            else
532            {
533                buf = m_plainTextBuf.toString();
534            }
535            //
536            //  We must first empty the buffer because the side effect of
537            //  calling makeCamelCaseLink() is to call this routine.
538            //
539
540            m_plainTextBuf = new StringBuilder(20);
541
542            try
543            {
544                //
545                //  This is the heaviest part of parsing, and therefore we can
546                //  do some optimization here.
547                //
548                //  1) Only when the length of the buffer is big enough, we try to do the match
549                //
550
551                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 )
552                {
553                    // System.out.println("Buffer="+buf);
554
555                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) )
556                    {
557                        MatchResult result = m_camelCaseMatcher.getMatch();
558
559                        String firstPart = buf.substring(0,result.beginOffset(0));
560                        String prefix = result.group(1);
561
562                        if( prefix == null ) prefix = "";
563
564                        String camelCase = result.group(2);
565                        String protocol  = result.group(3);
566                        String uri       = protocol+result.group(4);
567                        buf              = buf.substring(result.endOffset(0));
568
569                        m_currentElement.addContent( firstPart );
570
571                        //
572                        //  Check if the user does not wish to do URL or WikiWord expansion
573                        //
574                        if( prefix.endsWith("~") || prefix.indexOf('[') != -1 )
575                        {
576                            if( prefix.endsWith("~") )
577                            {
578                                if( m_wysiwygEditorMode )
579                                {
580                                    m_currentElement.addContent( "~" );
581                                }
582                                prefix = prefix.substring(0,prefix.length()-1);
583                            }
584                            if( camelCase != null )
585                            {
586                                m_currentElement.addContent( prefix+camelCase );
587                            }
588                            else if( protocol != null )
589                            {
590                                m_currentElement.addContent( prefix+uri );
591                            }
592                            continue;
593                        }
594
595                        //
596                        //  Fine, then let's check what kind of a link this was
597                        //  and emit the proper elements
598                        //
599                        if( protocol != null )
600                        {
601                            char c = uri.charAt(uri.length()-1);
602                            if( c == '.' || c == ',' )
603                            {
604                                uri = uri.substring(0,uri.length()-1);
605                                buf = c + buf;
606                            }
607                            // System.out.println("URI match "+uri);
608                            m_currentElement.addContent( prefix );
609                            makeDirectURILink( uri );
610                        }
611                        else
612                        {
613                            // System.out.println("Matched: '"+camelCase+"'");
614                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
615                            // System.out.println("prefix="+prefix);
616                            m_currentElement.addContent( prefix );
617
618                            makeCamelCaseLink( camelCase );
619                        }
620                    }
621
622                    m_currentElement.addContent( buf );
623                }
624                else
625                {
626                    //
627                    //  No camelcase asked for, just add the elements
628                    //
629                    m_currentElement.addContent( buf );
630                }
631            }
632            catch( IllegalDataException e )
633            {
634                //
635                // Sometimes it's possible that illegal XML chars is added to the data.
636                // Here we make sure it does not stop parsing.
637                //
638                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
639            }
640        }
641
642        return numChars;
643    }
644
645    /**
646     *  Escapes XML entities in a HTML-compatible way (i.e. does not escape
647     *  entities that are already escaped).
648     *
649     *  @param buf
650     *  @return An escaped string.
651     */
652    private String escapeHTMLEntities(String buf)
653    {
654        StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
655
656        for( int i = 0; i < buf.length(); i++ )
657        {
658            char ch = buf.charAt(i);
659
660            if( ch == '<' )
661            {
662                tmpBuf.append("&lt;");
663            }
664            else if( ch == '>' )
665            {
666                tmpBuf.append("&gt;");
667            }
668            else if( ch == '\"' )
669            {
670                tmpBuf.append("&quot;");
671            }
672            else if( ch == '&' )
673            {
674                //
675                //  If the following is an XML entity reference (&#.*;) we'll
676                //  leave it as it is; otherwise we'll replace it with an &amp;
677                //
678
679                boolean isEntity = false;
680                StringBuilder entityBuf = new StringBuilder();
681
682                if( i < buf.length() -1 )
683                {
684                    for( int j = i; j < buf.length(); j++ )
685                    {
686                        char ch2 = buf.charAt(j);
687
688                        if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' )
689                        {
690                            entityBuf.append(ch2);
691
692                            if( ch2 == ';' )
693                            {
694                                isEntity = true;
695                                break;
696                            }
697                        }
698                        else
699                        {
700                            break;
701                        }
702                    }
703                }
704
705                if( isEntity )
706                {
707                    tmpBuf.append( entityBuf );
708                    i = i + entityBuf.length() - 1;
709                }
710                else
711                {
712                    tmpBuf.append("&amp;");
713                }
714
715            }
716            else
717            {
718                tmpBuf.append( ch );
719            }
720        }
721
722        return tmpBuf.toString();
723    }
724
725    private Element pushElement( Element e )
726    {
727        flushPlainText();
728        m_currentElement.addContent( e );
729        m_currentElement = e;
730
731        return e;
732    }
733
734    private Element addElement( Content e )
735    {
736        if( e != null )
737        {
738            flushPlainText();
739            m_currentElement.addContent( e );
740        }
741        return m_currentElement;
742    }
743
744    /**
745     *  All elements that can be empty by the HTML DTD.
746     */
747    //  Keep sorted.
748    private static final String[] EMPTY_ELEMENTS = {
749        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
750    };
751
752    /**
753     *  Goes through the current element stack and pops all elements until this
754     *  element is found - this essentially "closes" and element.
755     *
756     *  @param s
757     *  @return The new current element, or null, if there was no such element in the entire stack.
758     */
759    private Element popElement( String s )
760    {
761        int flushedBytes = flushPlainText();
762
763        Element currEl = m_currentElement;
764
765        while( currEl.getParentElement() != null )
766        {
767            if( currEl.getName().equals(s) && !currEl.isRootElement() )
768            {
769                m_currentElement = currEl.getParentElement();
770
771                //
772                //  Check if it's okay for this element to be empty.  Then we will
773                //  trick the JDOM generator into not generating an empty element,
774                //  by putting an empty string between the tags.  Yes, it's a kludge
775                //  but what'cha gonna do about it. :-)
776                //
777
778                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 )
779                {
780                    currEl.addContent("");
781                }
782
783                return m_currentElement;
784            }
785
786            currEl = currEl.getParentElement();
787        }
788
789        return null;
790    }
791
792
793    /**
794     *  Reads the stream until it meets one of the specified
795     *  ending characters, or stream end.  The ending character will be left
796     *  in the stream.
797     */
798    private String readUntil( String endChars )
799        throws IOException
800    {
801        StringBuilder sb = new StringBuilder( 80 );
802        int ch = nextToken();
803
804        while( ch != -1 )
805        {
806            if( ch == '\\' )
807            {
808                ch = nextToken();
809                if( ch == -1 )
810                {
811                    break;
812                }
813            }
814            else
815            {
816                if( endChars.indexOf((char)ch) != -1 )
817                {
818                    pushBack( ch );
819                    break;
820                }
821            }
822            sb.append( (char) ch );
823            ch = nextToken();
824        }
825
826        return sb.toString();
827    }
828
829    /**
830     *  Reads the stream while the characters that have been specified are
831     *  in the stream, returning then the result as a String.
832     */
833    private String readWhile( String endChars )
834        throws IOException
835    {
836        StringBuilder sb = new StringBuilder( 80 );
837        int ch = nextToken();
838
839        while( ch != -1 )
840        {
841            if( endChars.indexOf((char)ch) == -1 )
842            {
843                pushBack( ch );
844                break;
845            }
846
847            sb.append( (char) ch );
848            ch = nextToken();
849        }
850
851        return sb.toString();
852    }
853
854    private JSPWikiMarkupParser m_cleanTranslator;
855
856    /**
857     *  Does a lazy init.  Otherwise, we would get into a situation
858     *  where HTMLRenderer would try and boot a TranslatorReader before
859     *  the TranslatorReader it is contained by is up.
860     */
861    private JSPWikiMarkupParser getCleanTranslator()
862    {
863        if( m_cleanTranslator == null )
864        {
865            WikiContext dummyContext = new WikiContext( m_engine,
866                                                        m_context.getHttpRequest(),
867                                                        m_context.getPage() );
868            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
869
870            m_cleanTranslator.m_allowHTML = true;
871        }
872
873        return m_cleanTranslator;
874    }
875    /**
876     *  Modifies the "hd" parameter to contain proper values.  Because
877     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
878     *  % after url encoding with '_'.
879     *  <p>
880     *  Counts also duplicate headings (= headings with similar name), and
881     *  attaches a counter.
882     */
883    private String makeHeadingAnchor( String baseName, String title, Heading hd )
884    {
885        hd.m_titleText = title;
886        title = MarkupParser.wikifyLink( title );
887
888        hd.m_titleSection = m_engine.encodeName(title);
889
890        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) )
891        {
892            Integer count = m_titleSectionCounter.get( hd.m_titleSection );
893            count = count + 1;
894            m_titleSectionCounter.put( hd.m_titleSection, count );
895            hd.m_titleSection += "-" + count;
896        }
897        else
898        {
899            m_titleSectionCounter.put( hd.m_titleSection, 1 );
900        }
901
902        hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+
903                           "-"+hd.m_titleSection;
904        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
905        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
906
907        return hd.m_titleAnchor;
908    }
909
910    private String makeSectionTitle( String title )
911    {
912        title = title.trim();
913        String outTitle;
914
915        try
916        {
917            JSPWikiMarkupParser dtr = getCleanTranslator();
918            dtr.setInputReader( new StringReader(title) );
919
920            CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse());
921
922            outTitle = ctt.getString();
923        }
924        catch( IOException e )
925        {
926            log.fatal("CleanTranslator not working", e);
927            throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() , e);
928        }
929
930        return outTitle;
931    }
932
933    /**
934     *  Returns XHTML for the heading.
935     *
936     *  @param level The level of the heading.  @see Heading
937     *  @param title the title for the heading
938     *  @param hd a List to which heading should be added
939     *  @return An Element containing the heading
940     */
941    public Element makeHeading( int level, String title, Heading hd )
942    {
943        Element el = null;
944
945        String pageName = m_context.getPage().getName();
946
947        String outTitle = makeSectionTitle( title );
948
949        hd.m_level = level;
950
951        switch( level )
952        {
953          case Heading.HEADING_SMALL:
954            el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
955            break;
956
957          case Heading.HEADING_MEDIUM:
958            el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
959            break;
960
961          case Heading.HEADING_LARGE:
962            el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
963            break;
964
965          default:
966            throw new InternalWikiException("Illegal heading type "+level);
967        }
968
969
970        return el;
971    }
972
973    /**
974     *  When given a link to a WikiName, we just return
975     *  a proper HTML link for it.  The local link mutator
976     *  chain is also called.
977     */
978    private Element makeCamelCaseLink( String wikiname )
979    {
980        String matchedLink = m_linkParsingOperations.linkIfExists( wikiname );
981
982        callMutatorChain( m_localLinkMutatorChain, wikiname );
983
984        if( matchedLink != null ) {
985            makeLink( READ, matchedLink, wikiname, null, null );
986        } else {
987            makeLink( EDIT, wikiname, wikiname, null, null );
988        }
989
990        return m_currentElement;
991    }
992
993    /** Holds the image URL for the duration of this parser */
994    private String m_outlinkImageURL = null;
995
996    /**
997     *  Returns an element for the external link image (out.png).  However,
998     *  this method caches the URL for the lifetime of this MarkupParser,
999     *  because it's commonly used, and we'll end up with possibly hundreds
1000     *  our thousands of references to it...  It's a lot faster, too.
1001     *
1002     *  @return  An element containing the HTML for the outlink image.
1003     */
1004    private Element outlinkImage()
1005    {
1006        Element el = null;
1007
1008        if( m_useOutlinkImage )
1009        {
1010            if( m_outlinkImageURL == null )
1011            {
1012                m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE );
1013            }
1014
1015            el = new Element( "img" ).setAttribute( "class", OUTLINK );
1016            el.setAttribute( "src", m_outlinkImageURL );
1017            el.setAttribute( "alt","" );
1018        }
1019
1020        return el;
1021    }
1022
1023    /**
1024     *  Takes an URL and turns it into a regular wiki link.  Unfortunately,
1025     *  because of the way that flushPlainText() works, it already encodes
1026     *  all of the XML entities.  But so does WikiContext.getURL(), so we
1027     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
1028     *  <p>
1029     *  What a crappy problem.
1030     *
1031     * @param url
1032     * @return An anchor Element containing the link.
1033     */
1034    private Element makeDirectURILink( String url )
1035    {
1036        Element result;
1037        String last = null;
1038
1039        if( url.endsWith(",") || url.endsWith(".") )
1040        {
1041            last = url.substring( url.length()-1 );
1042            url  = url.substring( 0, url.length()-1 );
1043        }
1044
1045        callMutatorChain( m_externalLinkMutatorChain, url );
1046
1047        if( m_linkParsingOperations.isImageLink( url ) )
1048        {
1049            result = handleImageLink( StringUtils.replace(url,"&amp;","&"), url, false );
1050        }
1051        else
1052        {
1053            result = makeLink( EXTERNAL, StringUtils.replace(url,"&amp;","&"), url, null, null );
1054            addElement( outlinkImage() );
1055        }
1056
1057        if( last != null )
1058        {
1059            m_plainTextBuf.append(last);
1060        }
1061
1062        return result;
1063    }
1064
1065    /**
1066     *  Image links are handled differently:
1067     *  1. If the text is a WikiName of an existing page,
1068     *     it gets linked.
1069     *  2. If the text is an external link, then it is inlined.
1070     *  3. Otherwise it becomes an ALT text.
1071     *
1072     *  @param reallink The link to the image.
1073     *  @param link     Link text portion, may be a link to somewhere else.
1074     *  @param hasLinkText If true, then the defined link had a link text available.
1075     *                  This means that the link text may be a link to a wiki page,
1076     *                  or an external resource.
1077     */
1078
1079    // FIXME: isExternalLink() is called twice.
1080    private Element handleImageLink( String reallink, String link, boolean hasLinkText )
1081    {
1082        String possiblePage = MarkupParser.cleanLink( link );
1083
1084        if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText )
1085        {
1086            return makeLink( IMAGELINK, reallink, link, null, null );
1087        }
1088        else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText )
1089        {
1090            // System.out.println("Orig="+link+", Matched: "+matchedLink);
1091            callMutatorChain( m_localLinkMutatorChain, possiblePage );
1092
1093            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
1094        }
1095        else
1096        {
1097            return makeLink( IMAGE, reallink, link, null, null );
1098        }
1099    }
1100
1101    private Element handleAccessRule( String ruleLine )
1102    {
1103        if( m_wysiwygEditorMode )
1104        {
1105            m_currentElement.addContent( "[" + ruleLine + "]" );
1106        }
1107
1108        if( !m_parseAccessRules ) return m_currentElement;
1109        Acl acl;
1110        WikiPage          page = m_context.getRealPage();
1111        // UserDatabase      db = m_context.getEngine().getUserDatabase();
1112
1113        if( ruleLine.startsWith( "{" ) )
1114            ruleLine = ruleLine.substring( 1 );
1115        if( ruleLine.endsWith( "}" ) )
1116            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1117
1118        if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine);
1119
1120        try
1121        {
1122            acl = m_engine.getAclManager().parseAcl( page, ruleLine );
1123
1124            page.setAcl( acl );
1125
1126            if( log.isDebugEnabled() ) log.debug( acl.toString() );
1127        }
1128        catch( WikiSecurityException wse )
1129        {
1130            return makeError( wse.getMessage() );
1131        }
1132
1133        return m_currentElement;
1134    }
1135
1136    /**
1137     *  Handles metadata setting [{SET foo=bar}]
1138     */
1139    private Element handleMetadata( String link )
1140    {
1141        if( m_wysiwygEditorMode )
1142        {
1143            m_currentElement.addContent( "[" + link + "]" );
1144        }
1145
1146        try
1147        {
1148            String args = link.substring( link.indexOf(' '), link.length()-1 );
1149
1150            String name = args.substring( 0, args.indexOf('=') );
1151            String val  = args.substring( args.indexOf('=')+1, args.length() );
1152
1153            name = name.trim();
1154            val  = val.trim();
1155
1156            if( val.startsWith("'") ) val = val.substring( 1 );
1157            if( val.endsWith("'") )   val = val.substring( 0, val.length()-1 );
1158
1159            // log.debug("SET name='"+name+"', value='"+val+"'.");
1160
1161            if( name.length() > 0 && val.length() > 0 )
1162            {
1163                val = m_engine.getVariableManager().expandVariables( m_context,
1164                                                                     val );
1165
1166                m_context.getPage().setAttribute( name, val );
1167            }
1168        }
1169        catch( Exception e )
1170        {
1171            ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1172            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
1173        }
1174
1175        return m_currentElement;
1176    }
1177
1178    /**
1179     *  Emits a processing instruction that will disable markup escaping. This is
1180     *  very useful if you want to emit HTML directly into the stream.
1181     *
1182     */
1183    private void disableOutputEscaping()
1184    {
1185        addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") );
1186    }
1187
1188    /**
1189     *  Gobbles up all hyperlinks that are encased in square brackets.
1190     */
1191    private Element handleHyperlinks( String linktext, int pos )
1192    {
1193        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1194
1195        StringBuilder sb = new StringBuilder(linktext.length()+80);
1196
1197        if( m_linkParsingOperations.isAccessRule( linktext ) )
1198        {
1199            return handleAccessRule( linktext );
1200        }
1201
1202        if( m_linkParsingOperations.isMetadata( linktext ) )
1203        {
1204            return handleMetadata( linktext );
1205        }
1206
1207        if( m_linkParsingOperations.isPluginLink( linktext ) )
1208        {
1209            try
1210            {
1211                PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
1212                //
1213                //  This might sometimes fail, especially if there is something which looks
1214                //  like a plugin invocation but is really not.
1215                //
1216                if( pluginContent != null )
1217                {
1218                    addElement( pluginContent );
1219
1220                    pluginContent.executeParse( m_context );
1221                }
1222            }
1223            catch( PluginException e )
1224            {
1225                log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
1226                //log.info( "Root cause:",e.getRootThrowable() );
1227                if( !m_wysiwygEditorMode )
1228                {
1229                    ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE );
1230                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ),
1231                                                                        m_context.getRealPage().getWiki(),
1232                                                                        m_context.getRealPage().getName(),
1233                                                                        e.getMessage() ) ) );
1234                }
1235            }
1236
1237            return m_currentElement;
1238        }
1239
1240        try
1241        {
1242            LinkParser.Link link = m_linkParser.parse(linktext);
1243            linktext       = link.getText();
1244            String linkref = link.getReference();
1245
1246            //
1247            //  Yes, we now have the components separated.
1248            //  linktext = the text the link should have
1249            //  linkref  = the url or page name.
1250            //
1251            //  In many cases these are the same.  [linktext|linkref].
1252            //
1253            if( m_linkParsingOperations.isVariableLink( linktext ) )
1254            {
1255                Content el = new VariableContent(linktext);
1256
1257                addElement( el );
1258            }
1259            else if( m_linkParsingOperations.isExternalLink( linkref ) )
1260            {
1261                // It's an external link, out of this Wiki
1262
1263                callMutatorChain( m_externalLinkMutatorChain, linkref );
1264
1265                if( m_linkParsingOperations.isImageLink( linkref ) )
1266                {
1267                    handleImageLink( linkref, linktext, link.hasReference() );
1268                }
1269                else
1270                {
1271                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
1272                    addElement( outlinkImage() );
1273                }
1274            }
1275            else if( link.isInterwikiLink() )
1276            {
1277                // It's an interwiki link
1278                // InterWiki links also get added to external link chain
1279                // after the links have been resolved.
1280
1281                // FIXME: There is an interesting issue here:  We probably should
1282                //        URLEncode the wikiPage, but we can't since some of the
1283                //        Wikis use slashes (/), which won't survive URLEncoding.
1284                //        Besides, we don't know which character set the other Wiki
1285                //        is using, so you'll have to write the entire name as it appears
1286                //        in the URL.  Bugger.
1287
1288                String extWiki  = link.getExternalWiki();
1289                String wikiPage = link.getExternalWikiPage();
1290
1291                if( m_wysiwygEditorMode )
1292                {
1293                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
1294                }
1295                else
1296                {
1297                    String urlReference = m_engine.getInterWikiURL( extWiki );
1298
1299                    if( urlReference != null )
1300                    {
1301                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1302                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
1303
1304                        if( m_linkParsingOperations.isImageLink(urlReference) )
1305                        {
1306                            handleImageLink( urlReference, linktext, link.hasReference() );
1307                        }
1308                        else
1309                        {
1310                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
1311                        }
1312
1313                        if( m_linkParsingOperations.isExternalLink(urlReference) )
1314                        {
1315                            addElement( outlinkImage() );
1316                        }
1317                    }
1318                    else
1319                    {
1320                        Object[] args = { extWiki };
1321                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
1322                    }
1323                }
1324            }
1325            else if( linkref.startsWith("#") )
1326            {
1327                // It defines a local footnote
1328                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1329            }
1330            else if( TextUtil.isNumber( linkref ) )
1331            {
1332                // It defines a reference to a local footnote
1333                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1334            }
1335            else
1336            {
1337                int hashMark = -1;
1338
1339                //
1340                //  Internal wiki link, but is it an attachment link?
1341                //
1342                String attachment = m_engine.getAttachmentManager().getAttachmentInfoName( m_context, linkref );
1343                if( attachment != null )
1344                {
1345                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1346
1347                    if( m_linkParsingOperations.isImageLink( linkref ) )
1348                    {
1349                        attachment = m_context.getURL( WikiContext.ATTACH, attachment );
1350                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1351                    }
1352                    else
1353                    {
1354                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1355                    }
1356                }
1357                else if( (hashMark = linkref.indexOf('#')) != -1 )
1358                {
1359                    // It's an internal Wiki link, but to a named section
1360
1361                    String namedSection = linkref.substring( hashMark+1 );
1362                    linkref = linkref.substring( 0, hashMark );
1363
1364                    linkref = MarkupParser.cleanLink( linkref );
1365
1366                    callMutatorChain( m_localLinkMutatorChain, linkref );
1367
1368                    String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1369                    if( matchedLink != null ) {
1370                        String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection));
1371                        sectref = sectref.replace('%', '_');
1372                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1373                    } else {
1374                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1375                    }
1376                }
1377                else
1378                {
1379                    // It's an internal Wiki link
1380                    linkref = MarkupParser.cleanLink( linkref );
1381
1382                    callMutatorChain( m_localLinkMutatorChain, linkref );
1383
1384                    String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1385                    if( matchedLink != null ) {
1386                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1387                    } else {
1388                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1389                    }
1390                }
1391            }
1392        }
1393        catch( ParseException e )
1394        {
1395            log.info("Parser failure: ",e);
1396            Object[] args = { e.getMessage() };
1397            addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1398        }
1399
1400        return m_currentElement;
1401    }
1402
1403    /**
1404     *  Pushes back any string that has been read.  It will obviously
1405     *  be pushed back in a reverse order.
1406     *
1407     *  @since 2.1.77
1408     */
1409    private void pushBack( String s )
1410        throws IOException
1411    {
1412        for( int i = s.length()-1; i >= 0; i-- )
1413        {
1414            pushBack( s.charAt(i) );
1415        }
1416    }
1417
1418    private Element handleBackslash()
1419        throws IOException
1420    {
1421        int ch = nextToken();
1422
1423        if( ch == '\\' )
1424        {
1425            int ch2 = nextToken();
1426
1427            if( ch2 == '\\' )
1428            {
1429                pushElement( new Element("br").setAttribute("clear","all"));
1430                return popElement("br");
1431            }
1432
1433            pushBack( ch2 );
1434
1435            pushElement( new Element("br") );
1436            return popElement("br");
1437        }
1438
1439        pushBack( ch );
1440
1441        return null;
1442    }
1443
1444    private Element handleUnderscore()
1445        throws IOException
1446    {
1447        int ch = nextToken();
1448        Element el = null;
1449
1450        if( ch == '_' )
1451        {
1452            if( m_isbold )
1453            {
1454                el = popElement("b");
1455            }
1456            else
1457            {
1458                el = pushElement( new Element("b") );
1459            }
1460            m_isbold = !m_isbold;
1461        }
1462        else
1463        {
1464            pushBack( ch );
1465        }
1466
1467        return el;
1468    }
1469
1470
1471    /**
1472     *  For example: italics.
1473     */
1474    private Element handleApostrophe()
1475        throws IOException
1476    {
1477        int ch = nextToken();
1478        Element el = null;
1479
1480        if( ch == '\'' )
1481        {
1482            if( m_isitalic )
1483            {
1484                el = popElement("i");
1485            }
1486            else
1487            {
1488                el = pushElement( new Element("i") );
1489            }
1490            m_isitalic = !m_isitalic;
1491        }
1492        else
1493        {
1494            pushBack( ch );
1495        }
1496
1497        return el;
1498    }
1499
1500    private Element handleOpenbrace( boolean isBlock )
1501        throws IOException
1502    {
1503        int ch = nextToken();
1504
1505        if( ch == '{' )
1506        {
1507            int ch2 = nextToken();
1508
1509            if( ch2 == '{' )
1510            {
1511                m_isPre = true;
1512                m_isEscaping = true;
1513                m_isPreBlock = isBlock;
1514
1515                if( isBlock )
1516                {
1517                    startBlockLevel();
1518                    return pushElement( new Element("pre") );
1519                }
1520
1521                return pushElement( new Element("span").setAttribute("style","font-family:monospace; white-space:pre;") );
1522            }
1523
1524            pushBack( ch2 );
1525
1526            return pushElement( new Element("tt") );
1527        }
1528
1529        pushBack( ch );
1530
1531        return null;
1532    }
1533
1534    /**
1535     *  Handles both }} and }}}
1536     */
1537    private Element handleClosebrace()
1538        throws IOException
1539    {
1540        int ch2 = nextToken();
1541
1542        if( ch2 == '}' )
1543        {
1544            int ch3 = nextToken();
1545
1546            if( ch3 == '}' )
1547            {
1548                if( m_isPre )
1549                {
1550                    if( m_isPreBlock )
1551                    {
1552                        popElement( "pre" );
1553                    }
1554                    else
1555                    {
1556                        popElement( "span" );
1557                    }
1558
1559                    m_isPre = false;
1560                    m_isEscaping = false;
1561                    return m_currentElement;
1562                }
1563
1564                m_plainTextBuf.append("}}}");
1565                return m_currentElement;
1566            }
1567
1568            pushBack( ch3 );
1569
1570            if( !m_isEscaping )
1571            {
1572                return popElement("tt");
1573            }
1574        }
1575
1576        pushBack( ch2 );
1577
1578        return null;
1579    }
1580
1581    private Element handleDash()
1582        throws IOException
1583    {
1584        int ch = nextToken();
1585
1586        if( ch == '-' )
1587        {
1588            int ch2 = nextToken();
1589
1590            if( ch2 == '-' )
1591            {
1592                int ch3 = nextToken();
1593
1594                if( ch3 == '-' )
1595                {
1596                    // Empty away all the rest of the dashes.
1597                    // Do not forget to return the first non-match back.
1598                    do
1599                    {
1600                        ch = nextToken();
1601                    }
1602                    while ( ch == '-' );
1603
1604                    pushBack(ch);
1605                    startBlockLevel();
1606                    pushElement( new Element("hr") );
1607                    return popElement( "hr" );
1608                }
1609
1610                pushBack( ch3 );
1611            }
1612            pushBack( ch2 );
1613        }
1614
1615        pushBack( ch );
1616
1617        return null;
1618    }
1619
1620    private Element handleHeading()
1621        throws IOException
1622    {
1623        Element el = null;
1624
1625        int ch  = nextToken();
1626
1627        Heading hd = new Heading();
1628
1629        if( ch == '!' )
1630        {
1631            int ch2 = nextToken();
1632
1633            if( ch2 == '!' )
1634            {
1635                String title = peekAheadLine();
1636
1637                el = makeHeading( Heading.HEADING_LARGE, title, hd);
1638            }
1639            else
1640            {
1641                pushBack( ch2 );
1642                String title = peekAheadLine();
1643                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1644            }
1645        }
1646        else
1647        {
1648            pushBack( ch );
1649            String title = peekAheadLine();
1650            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1651        }
1652
1653        callHeadingListenerChain( hd );
1654
1655        m_lastHeading = hd;
1656
1657        if( el != null ) pushElement(el);
1658
1659        return el;
1660    }
1661
1662    /**
1663     *  Reads the stream until the next EOL or EOF.  Note that it will also read the
1664     *  EOL from the stream.
1665     */
1666    private StringBuilder readUntilEOL()
1667        throws IOException
1668    {
1669        int ch;
1670        StringBuilder buf = new StringBuilder( 256 );
1671
1672        while( true )
1673        {
1674            ch = nextToken();
1675
1676            if( ch == -1 )
1677                break;
1678
1679            buf.append( (char) ch );
1680
1681            if( ch == '\n' )
1682                break;
1683        }
1684        return buf;
1685    }
1686
1687    /** Controls whether italic is restarted after a paragraph shift */
1688
1689    private boolean m_restartitalic = false;
1690    private boolean m_restartbold   = false;
1691
1692    private boolean m_newLine;
1693
1694    /**
1695     *  Starts a block level element, therefore closing
1696     *  a potential open paragraph tag.
1697     */
1698    private void startBlockLevel()
1699    {
1700        // These may not continue over block level limits in XHTML
1701
1702        popElement("i");
1703        popElement("b");
1704        popElement("tt");
1705
1706        if( m_isOpenParagraph )
1707        {
1708            m_isOpenParagraph = false;
1709            popElement("p");
1710            m_plainTextBuf.append("\n"); // Just small beautification
1711        }
1712
1713        m_restartitalic = m_isitalic;
1714        m_restartbold   = m_isbold;
1715
1716        m_isitalic = false;
1717        m_isbold   = false;
1718    }
1719
1720    private static String getListType( char c )
1721    {
1722        if( c == '*' )
1723        {
1724            return "ul";
1725        }
1726        else if( c == '#' )
1727        {
1728            return "ol";
1729        }
1730        throw new InternalWikiException("Parser got faulty list type: "+c);
1731    }
1732    /**
1733     *  Like original handleOrderedList() and handleUnorderedList()
1734     *  however handles both ordered ('#') and unordered ('*') mixed together.
1735     */
1736
1737    // FIXME: Refactor this; it's a bit messy.
1738
1739    private Element handleGeneralList()
1740        throws IOException
1741    {
1742         startBlockLevel();
1743
1744         String strBullets = readWhile( "*#" );
1745         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
1746         int numBullets = strBullets.length();
1747
1748         // override the beginning portion of bullet pattern to be like the previous
1749         // to simulate PHPWiki style lists
1750
1751         if(m_allowPHPWikiStyleLists)
1752         {
1753             // only substitute if different
1754             if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1755                   (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) )
1756             {
1757                 if(numBullets <= m_genlistlevel)
1758                 {
1759                     // Substitute all but the last character (keep the expressed bullet preference)
1760                     strBullets  = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "")
1761                                   + strBullets.substring(numBullets-1, numBullets);
1762                 }
1763                 else
1764                 {
1765                     strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets);
1766                 }
1767             }
1768         }
1769
1770         //
1771         //  Check if this is still of the same type
1772         //
1773         if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1774            (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) )
1775         {
1776             if( numBullets > m_genlistlevel )
1777             {
1778                 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) );
1779
1780                 for( ; m_genlistlevel < numBullets; m_genlistlevel++ )
1781                 {
1782                     // bullets are growing, get from new bullet list
1783                     pushElement( new Element("li") );
1784                     pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) ));
1785                 }
1786             }
1787             else if( numBullets < m_genlistlevel )
1788             {
1789                 //  Close the previous list item.
1790                 // buf.append( m_renderer.closeListItem() );
1791                 popElement( "li" );
1792
1793                 for( ; m_genlistlevel > numBullets; m_genlistlevel-- )
1794                 {
1795                     // bullets are shrinking, get from old bullet list
1796
1797                     popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1798                     if( m_genlistlevel > 0 )
1799                     {
1800                         popElement( "li" );
1801                     }
1802
1803                 }
1804             }
1805             else
1806             {
1807                 if( m_genlistlevel > 0 )
1808                 {
1809                     popElement( "li" );
1810                 }
1811             }
1812         }
1813         else
1814         {
1815             //
1816             //  The pattern has changed, unwind and restart
1817             //
1818             int  numEqualBullets;
1819             int  numCheckBullets;
1820
1821             // find out how much is the same
1822             numEqualBullets = 0;
1823             numCheckBullets = Math.min(numBullets,m_genlistlevel);
1824
1825             while( numEqualBullets < numCheckBullets )
1826             {
1827                 // if the bullets are equal so far, keep going
1828                 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets))
1829                     numEqualBullets++;
1830                 // otherwise giveup, we have found how many are equal
1831                 else
1832                     break;
1833             }
1834
1835             //unwind
1836             for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- )
1837             {
1838                 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) );
1839                 if( m_genlistlevel > numBullets )
1840                 {
1841                     popElement("li");
1842                 }
1843             }
1844
1845             //rewind
1846
1847             pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) );
1848             for(int i = numEqualBullets; i < numBullets; i++)
1849             {
1850                 pushElement( new Element("li") );
1851                 pushElement( new Element( getListType( strBullets.charAt(i) ) ) );
1852             }
1853             m_genlistlevel = numBullets;
1854         }
1855
1856         //
1857         //  Push a new list item, and eat away any extra whitespace
1858         //
1859         pushElement( new Element("li") );
1860         readWhile(" ");
1861
1862         // work done, remember the new bullet list (in place of old one)
1863         m_genlistBulletBuffer.setLength(0);
1864         m_genlistBulletBuffer.append(strBullets);
1865
1866         return m_currentElement;
1867    }
1868
1869    private Element unwindGeneralList()
1870    {
1871        //unwind
1872        for( ; m_genlistlevel > 0; m_genlistlevel-- )
1873        {
1874            popElement( "li" );
1875            popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1876        }
1877
1878        m_genlistBulletBuffer.setLength(0);
1879
1880        return null;
1881    }
1882
1883
1884    private Element handleDefinitionList()
1885        throws IOException
1886    {
1887        if( !m_isdefinition )
1888        {
1889            m_isdefinition = true;
1890
1891            startBlockLevel();
1892
1893            pushElement( new Element("dl") );
1894            return pushElement( new Element("dt") );
1895        }
1896
1897        return null;
1898    }
1899
1900    private Element handleOpenbracket()
1901        throws IOException
1902    {
1903        StringBuilder sb = new StringBuilder(40);
1904        int pos = getPosition();
1905        int ch = nextToken();
1906        boolean isPlugin = false;
1907
1908        if( ch == '[' )
1909        {
1910            if( m_wysiwygEditorMode )
1911            {
1912                sb.append( '[' );
1913            }
1914
1915            sb.append( (char)ch );
1916
1917            while( (ch = nextToken()) == '[' )
1918            {
1919                sb.append( (char)ch );
1920            }
1921        }
1922
1923
1924        if( ch == '{' )
1925        {
1926            isPlugin = true;
1927        }
1928
1929        pushBack( ch );
1930
1931        if( sb.length() > 0 )
1932        {
1933            m_plainTextBuf.append( sb );
1934            return m_currentElement;
1935        }
1936
1937        //
1938        //  Find end of hyperlink
1939        //
1940
1941        ch = nextToken();
1942        int nesting = 1;    // Check for nested plugins
1943
1944        while( ch != -1 )
1945        {
1946            int ch2 = nextToken(); pushBack(ch2);
1947
1948            if( isPlugin )
1949            {
1950                if( ch == '[' && ch2 == '{' )
1951                {
1952                    nesting++;
1953                }
1954                else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' )
1955                {
1956                    break;
1957                }
1958                else if( ch == '}' && ch2 == ']' )
1959                {
1960                    // NB: This will be decremented once at the end
1961                    nesting--;
1962                }
1963            }
1964            else
1965            {
1966                if( ch == ']' )
1967                {
1968                    break;
1969                }
1970            }
1971
1972            sb.append( (char) ch );
1973
1974            ch = nextToken();
1975        }
1976
1977        //
1978        //  If the link is never finished, do some tricks to display the rest of the line
1979        //  unchanged.
1980        //
1981        if( ch == -1 )
1982        {
1983            log.debug("Warning: unterminated link detected!");
1984            m_isEscaping = true;
1985            m_plainTextBuf.append( sb );
1986            flushPlainText();
1987            m_isEscaping = false;
1988            return m_currentElement;
1989        }
1990
1991        return handleHyperlinks( sb.toString(), pos );
1992    }
1993
1994    /**
1995     *  Reads the stream until the current brace is closed or stream end.
1996     */
1997    private String readBraceContent( char opening, char closing )
1998        throws IOException
1999    {
2000        StringBuilder sb = new StringBuilder(40);
2001        int braceLevel = 1;
2002        int ch;
2003        while(( ch = nextToken() ) != -1 )
2004        {
2005            if( ch == '\\' )
2006            {
2007                continue;
2008            }
2009            else if ( ch == opening )
2010            {
2011                braceLevel++;
2012            }
2013            else if ( ch == closing )
2014            {
2015                braceLevel--;
2016                if (braceLevel==0)
2017                {
2018                  break;
2019                }
2020            }
2021            sb.append( (char)ch );
2022        }
2023        return sb.toString();
2024    }
2025
2026
2027    /**
2028     *  Handles constructs of type %%(style) and %%class
2029     * @param newLine
2030     * @return An Element containing the div or span, depending on the situation.
2031     * @throws IOException
2032     */
2033    private Element handleDiv( boolean newLine )
2034        throws IOException
2035    {
2036        int ch = nextToken();
2037        Element el = null;
2038
2039        if( ch == '%' )
2040        {
2041            String style = null;
2042            String clazz = null;
2043
2044            ch = nextToken();
2045
2046            //
2047            //  Style or class?
2048            //
2049            if( ch == '(' )
2050            {
2051                style = readBraceContent('(',')');
2052            }
2053            else if( Character.isLetter( (char) ch ) )
2054            {
2055                pushBack( ch );
2056                clazz = readUntil( " \t\n\r" );
2057                //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters
2058                //CSS Classnames can contain only the characters [a-zA-Z0-9] and
2059                //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_".
2060                //They cannot start with a digit, two hyphens, or a hyphen followed by a digit.
2061
2062                //(1) replace '.' by spaces, allowing multiple classnames on a div or span
2063                //(2) remove any invalid character
2064                if( clazz != null){
2065
2066                    clazz = clazz.replace('.', ' ')
2067                                 .replaceAll("[^\\s-_\\w\\x200-\\x377]+","");
2068
2069                }
2070                ch = nextToken();
2071
2072                //
2073                //  Pop out only spaces, so that the upcoming EOL check does not check the
2074                //  next line.
2075                //
2076                if( ch == '\n' || ch == '\r' )
2077                {
2078                    pushBack(ch);
2079                }
2080            }
2081            else
2082            {
2083                //
2084                // Anything else stops.
2085                //
2086
2087                pushBack(ch);
2088
2089                try
2090                {
2091                    Boolean isSpan = m_styleStack.pop();
2092
2093                    if( isSpan == null )
2094                    {
2095                        // Fail quietly
2096                    }
2097                    else if( isSpan.booleanValue() )
2098                    {
2099                        el = popElement( "span" );
2100                    }
2101                    else
2102                    {
2103                        el = popElement( "div" );
2104                    }
2105                }
2106                catch( EmptyStackException e )
2107                {
2108                    log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened.");
2109                    return m_currentElement;
2110                }
2111
2112                return el;
2113            }
2114
2115            //
2116            //  Check if there is an attempt to do something nasty
2117            //
2118
2119            try
2120            {
2121                style = StringEscapeUtils.unescapeHtml(style);
2122                if( style != null && style.indexOf("javascript:") != -1 )
2123                {
2124                    log.debug("Attempt to output javascript within CSS:"+style);
2125                    ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2126                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
2127                }
2128            }
2129            catch( NumberFormatException e )
2130            {
2131                //
2132                //  If there are unknown entities, we don't want the parser to stop.
2133                //
2134                ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2135                String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
2136                return addElement( makeError( msg ) );
2137            }
2138
2139            //
2140            //  Decide if we should open a div or a span?
2141            //
2142            String eol = peekAheadLine();
2143
2144            if( eol.trim().length() > 0 )
2145            {
2146                // There is stuff after the class
2147
2148                el = new Element("span");
2149
2150                m_styleStack.push( Boolean.TRUE );
2151            }
2152            else
2153            {
2154                startBlockLevel();
2155                el = new Element("div");
2156                m_styleStack.push( Boolean.FALSE );
2157            }
2158
2159            if( style != null ) el.setAttribute("style", style);
2160            if( clazz != null ) el.setAttribute("class", clazz);
2161            el = pushElement( el );
2162
2163            return el;
2164        }
2165
2166        pushBack(ch);
2167
2168        return el;
2169    }
2170
2171    private Element handleSlash( boolean newLine )
2172        throws IOException
2173    {
2174        int ch = nextToken();
2175
2176        pushBack(ch);
2177        if( ch == '%' && !m_styleStack.isEmpty() )
2178        {
2179            return handleDiv( newLine );
2180        }
2181
2182        return null;
2183    }
2184
2185    private Element handleBar( boolean newLine )
2186        throws IOException
2187    {
2188        Element el = null;
2189
2190        if( !m_istable && !newLine )
2191        {
2192            return null;
2193        }
2194
2195        //
2196        //  If the bar is in the first column, we will either start
2197        //  a new table or continue the old one.
2198        //
2199
2200        if( newLine )
2201        {
2202            if( !m_istable )
2203            {
2204                startBlockLevel();
2205                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
2206                m_istable = true;
2207                m_rowNum = 0;
2208            }
2209
2210            m_rowNum++;
2211            Element tr = ( m_rowNum % 2 != 0 )
2212                       ? new Element("tr").setAttribute("class", "odd")
2213                       : new Element("tr");
2214            el = pushElement( tr );
2215        }
2216
2217        //
2218        //  Check out which table cell element to start;
2219        //  a header element (th) or a regular element (td).
2220        //
2221        int ch = nextToken();
2222
2223        if( ch == '|' )
2224        {
2225            if( !newLine )
2226            {
2227                el = popElement("th");
2228                if( el == null ) popElement("td");
2229            }
2230            el = pushElement( new Element("th") );
2231        }
2232        else
2233        {
2234            if( !newLine )
2235            {
2236                el = popElement("td");
2237                if( el == null ) popElement("th");
2238            }
2239
2240            el = pushElement( new Element("td") );
2241
2242            pushBack( ch );
2243        }
2244
2245        return el;
2246    }
2247
2248    /**
2249     *  Generic escape of next character or entity.
2250     */
2251    private Element handleTilde()
2252        throws IOException
2253    {
2254        int ch = nextToken();
2255
2256        if( ch == ' ' )
2257        {
2258            if( m_wysiwygEditorMode )
2259            {
2260                m_plainTextBuf.append( "~ " );
2261            }
2262            return m_currentElement;
2263        }
2264
2265        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
2266            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
2267            ch == '{' || ch == ']' || ch == '}' || ch == '%' )
2268        {
2269            if( m_wysiwygEditorMode )
2270            {
2271                m_plainTextBuf.append( '~' );
2272            }
2273
2274            m_plainTextBuf.append( (char)ch );
2275            m_plainTextBuf.append(readWhile( ""+(char)ch ));
2276            return m_currentElement;
2277        }
2278
2279        // No escape.
2280        pushBack( ch );
2281
2282        return null;
2283    }
2284
2285    private void fillBuffer( Element startElement )
2286        throws IOException
2287    {
2288        m_currentElement = startElement;
2289
2290        boolean quitReading = false;
2291        m_newLine = true;
2292        disableOutputEscaping();
2293
2294        while(!quitReading)
2295        {
2296            int ch = nextToken();
2297
2298            if( ch == -1 ) break;
2299
2300            //
2301            //  Check if we're actually ending the preformatted mode.
2302            //  We still must do an entity transformation here.
2303            //
2304            if( m_isEscaping )
2305            {
2306                if( ch == '}' )
2307                {
2308                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
2309                }
2310                else if( ch == -1 )
2311                {
2312                    quitReading = true;
2313                }
2314                else if( ch == '\r' )
2315                {
2316                    // DOS line feeds we ignore.
2317                }
2318                else if( ch == '<' )
2319                {
2320                    m_plainTextBuf.append( "&lt;" );
2321                }
2322                else if( ch == '>' )
2323                {
2324                    m_plainTextBuf.append( "&gt;" );
2325                }
2326                else if( ch == '&' )
2327                {
2328                    m_plainTextBuf.append( "&amp;" );
2329                }
2330                else if( ch == '~' )
2331                {
2332                    String braces = readWhile("}");
2333                    if( braces.length() >= 3 )
2334                    {
2335                        m_plainTextBuf.append("}}}");
2336
2337                        braces = braces.substring(3);
2338                    }
2339                    else
2340                    {
2341                        m_plainTextBuf.append( (char) ch );
2342                    }
2343
2344                    for( int i = braces.length()-1; i >= 0; i-- )
2345                    {
2346                        pushBack(braces.charAt(i));
2347                    }
2348                }
2349                else
2350                {
2351                    m_plainTextBuf.append( (char) ch );
2352                }
2353
2354                continue;
2355            }
2356
2357            //
2358            //  An empty line stops a list
2359            //
2360            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 )
2361            {
2362                m_plainTextBuf.append(unwindGeneralList());
2363            }
2364
2365            if( m_newLine && ch != '|' && m_istable )
2366            {
2367                popElement("table");
2368                m_istable = false;
2369            }
2370
2371            int skip = IGNORE;
2372
2373            //
2374            //  Do the actual parsing and catch any errors.
2375            //
2376            try
2377            {
2378                skip = parseToken( ch );
2379            }
2380            catch( IllegalDataException e )
2381            {
2382                log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage());
2383
2384                makeError("Error: "+cleanupSuspectData(e.getMessage()) );
2385            }
2386
2387            //
2388            //   The idea is as follows:  If the handler method returns
2389            //   an element (el != null), it is assumed that it has been
2390            //   added in the stack.  Otherwise the character is added
2391            //   as is to the plaintext buffer.
2392            //
2393            //   For the transition phase, if s != null, it also gets
2394            //   added in the plaintext buffer.
2395            //
2396
2397            switch( skip )
2398            {
2399                case ELEMENT:
2400                    m_newLine = false;
2401                    break;
2402
2403                case CHARACTER:
2404                    m_plainTextBuf.append( (char) ch );
2405                    m_newLine = false;
2406                    break;
2407
2408                case IGNORE:
2409                default:
2410                    break;
2411            }
2412        }
2413
2414        closeHeadings();
2415        popElement("domroot");
2416    }
2417
2418    private String cleanupSuspectData( String s )
2419    {
2420        StringBuilder sb = new StringBuilder( s.length() );
2421
2422        for( int i = 0; i < s.length(); i++ )
2423        {
2424            char c = s.charAt(i);
2425
2426            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
2427            else sb.append( "0x"+Integer.toString(c,16).toUpperCase() );
2428        }
2429
2430        return sb.toString();
2431    }
2432
2433    /** The token is a plain character. */
2434    protected static final int CHARACTER = 0;
2435
2436    /** The token is a wikimarkup element. */
2437    protected static final int ELEMENT   = 1;
2438
2439    /** The token is to be ignored. */
2440    protected static final int IGNORE    = 2;
2441
2442    /**
2443     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
2444     *  you think this was a wiki markup element, and IGNORE, if you think
2445     *  we should ignore this altogether.
2446     *  <p>
2447     *  To add your own MarkupParser, you can override this method, but it
2448     *  is recommended that you call super.parseToken() as well to gain advantage
2449     *  of JSPWiki's own markup.  You can call it at the start of your own
2450     *  parseToken() or end - it does not matter.
2451     *
2452     * @param ch The character under investigation
2453     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2454     * @throws IOException If parsing fails.
2455     */
2456    protected int parseToken( int ch )
2457        throws IOException
2458    {
2459        Element el = null;
2460
2461        //
2462        //  Now, check the incoming token.
2463        //
2464        switch( ch )
2465        {
2466          case '\r':
2467            // DOS linefeeds we forget
2468            return IGNORE;
2469
2470          case '\n':
2471            //
2472            //  Close things like headings, etc.
2473            //
2474
2475            // FIXME: This is not really very fast
2476
2477            closeHeadings();
2478
2479            popElement("dl"); // Close definition lists.
2480            if( m_istable )
2481            {
2482                popElement("tr");
2483            }
2484
2485            m_isdefinition = false;
2486
2487            if( m_newLine )
2488            {
2489                // Paragraph change.
2490                startBlockLevel();
2491
2492                //
2493                //  Figure out which elements cannot be enclosed inside
2494                //  a <p></p> pair according to XHTML rules.
2495                //
2496                String nextLine = peekAheadLine();
2497                if( nextLine.length() == 0 ||
2498                    (nextLine.length() > 0 &&
2499                     !nextLine.startsWith("{{{") &&
2500                     !nextLine.startsWith("----") &&
2501                     !nextLine.startsWith("%%") &&
2502                     "*#!;".indexOf( nextLine.charAt(0) ) == -1) )
2503                {
2504                    pushElement( new Element("p") );
2505                    m_isOpenParagraph = true;
2506
2507                    if( m_restartitalic )
2508                    {
2509                        pushElement( new Element("i") );
2510                        m_isitalic = true;
2511                        m_restartitalic = false;
2512                    }
2513                    if( m_restartbold )
2514                    {
2515                        pushElement( new Element("b") );
2516                        m_isbold = true;
2517                        m_restartbold = false;
2518                    }
2519                }
2520            }
2521            else
2522            {
2523                m_plainTextBuf.append("\n");
2524                m_newLine = true;
2525            }
2526            return IGNORE;
2527
2528
2529          case '\\':
2530            el = handleBackslash();
2531            break;
2532
2533          case '_':
2534            el = handleUnderscore();
2535            break;
2536
2537          case '\'':
2538            el = handleApostrophe();
2539            break;
2540
2541          case '{':
2542            el = handleOpenbrace( m_newLine );
2543            break;
2544
2545          case '}':
2546            el = handleClosebrace();
2547            break;
2548
2549          case '-':
2550            if( m_newLine )
2551                el = handleDash();
2552
2553            break;
2554
2555          case '!':
2556            if( m_newLine )
2557            {
2558                el = handleHeading();
2559            }
2560            break;
2561
2562          case ';':
2563            if( m_newLine )
2564            {
2565                el = handleDefinitionList();
2566            }
2567            break;
2568
2569          case ':':
2570            if( m_isdefinition )
2571            {
2572                popElement("dt");
2573                el = pushElement( new Element("dd") );
2574                m_isdefinition = false;
2575            }
2576            break;
2577
2578          case '[':
2579            el = handleOpenbracket();
2580            break;
2581
2582          case '*':
2583            if( m_newLine )
2584            {
2585                pushBack('*');
2586                el = handleGeneralList();
2587            }
2588            break;
2589
2590          case '#':
2591            if( m_newLine )
2592            {
2593                pushBack('#');
2594                el = handleGeneralList();
2595            }
2596            break;
2597
2598          case '|':
2599            el = handleBar( m_newLine );
2600            break;
2601
2602          case '~':
2603            el = handleTilde();
2604            break;
2605
2606          case '%':
2607            el = handleDiv( m_newLine );
2608            break;
2609
2610          case '/':
2611            el = handleSlash( m_newLine );
2612            break;
2613
2614          default:
2615            break;
2616        }
2617
2618        return el != null ? ELEMENT : CHARACTER;
2619    }
2620
2621    private void closeHeadings()
2622    {
2623        if( m_lastHeading != null && !m_wysiwygEditorMode )
2624        {
2625            // Add the hash anchor element at the end of the heading
2626            addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) );
2627            m_lastHeading = null;
2628        }
2629        popElement("h2");
2630        popElement("h3");
2631        popElement("h4");
2632    }
2633
2634    /**
2635     *  Parses the entire document from the Reader given in the constructor or
2636     *  set by {@link #setInputReader(Reader)}.
2637     *
2638     *  @return A WikiDocument, ready to be passed to the renderer.
2639     *  @throws IOException If parsing cannot be accomplished.
2640     */
2641    public WikiDocument parse()
2642        throws IOException
2643    {
2644        WikiDocument d = new WikiDocument( m_context.getPage() );
2645        d.setContext( m_context );
2646
2647        Element rootElement = new Element("domroot");
2648
2649        d.setRootElement( rootElement );
2650
2651        fillBuffer( rootElement );
2652
2653        paragraphify(rootElement);
2654
2655        return d;
2656    }
2657
2658    /**
2659     *  Checks out that the first paragraph is correctly installed.
2660     *
2661     *  @param rootElement
2662     */
2663    private void paragraphify(Element rootElement)
2664    {
2665        //
2666        //  Add the paragraph tag to the first paragraph
2667        //
2668        List< Content > kids = rootElement.getContent();
2669
2670        if( rootElement.getChild("p") != null )
2671        {
2672            ArrayList<Content> ls = new ArrayList<Content>();
2673            int idxOfFirstContent = 0;
2674            int count = 0;
2675
2676            for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ )
2677            {
2678                Content c = i.next();
2679                if( c instanceof Element )
2680                {
2681                    String name = ( ( Element )c ).getName();
2682                    if( isBlockLevel( name ) ) break;
2683                }
2684
2685                if( !(c instanceof ProcessingInstruction) )
2686                {
2687                    ls.add( c );
2688                    if( idxOfFirstContent == 0 ) idxOfFirstContent = count;
2689                }
2690            }
2691
2692            //
2693            //  If there were any elements, then add a new <p> (unless it would
2694            //  be an empty one)
2695            //
2696            if( ls.size() > 0 )
2697            {
2698                Element newel = new Element("p");
2699
2700                for( Iterator< Content > i = ls.iterator(); i.hasNext(); )
2701                {
2702                    Content c = i.next();
2703
2704                    c.detach();
2705                    newel.addContent(c);
2706                }
2707
2708                //
2709                // Make sure there are no empty <p/> tags added.
2710                //
2711                if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() )
2712                    rootElement.addContent(idxOfFirstContent, newel);
2713            }
2714        }
2715    }
2716
2717}