Source code

001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import java.io.IOException;
022import java.io.Reader;
023import java.io.StringReader;
024import java.text.MessageFormat;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collection;
028import java.util.EmptyStackException;
029import java.util.HashMap;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Map;
033import java.util.Properties;
034import java.util.ResourceBundle;
035import java.util.Stack;
036
037import javax.xml.transform.Result;
038
039import org.apache.commons.lang.StringEscapeUtils;
040import org.apache.commons.lang.StringUtils;
041import org.apache.log4j.Logger;
042import org.apache.oro.text.regex.MalformedPatternException;
043import org.apache.oro.text.regex.MatchResult;
044import org.apache.oro.text.regex.Pattern;
045import org.apache.oro.text.regex.PatternCompiler;
046import org.apache.oro.text.regex.PatternMatcher;
047import org.apache.oro.text.regex.Perl5Compiler;
048import org.apache.oro.text.regex.Perl5Matcher;
049import org.apache.wiki.InternalWikiException;
050import org.apache.wiki.StringTransmutator;
051import org.apache.wiki.WikiContext;
052import org.apache.wiki.WikiPage;
053import org.apache.wiki.api.exceptions.PluginException;
054import org.apache.wiki.api.plugin.WikiPlugin;
055import org.apache.wiki.auth.WikiSecurityException;
056import org.apache.wiki.auth.acl.Acl;
057import org.apache.wiki.i18n.InternationalizationManager;
058import org.apache.wiki.preferences.Preferences;
059import org.apache.wiki.render.CleanTextRenderer;
060import org.apache.wiki.render.RenderingManager;
061import org.apache.wiki.util.TextUtil;
062import org.jdom2.Attribute;
063import org.jdom2.Content;
064import org.jdom2.Element;
065import org.jdom2.IllegalDataException;
066import org.jdom2.ProcessingInstruction;
067import org.jdom2.Verifier;
068
069/**
070 *  Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the
071 *  heart and soul of JSPWiki : make sure you test properly anything that is added,
072 *  or else it breaks down horribly.
073 *
074 *  @since  2.4
075 */
076public class JSPWikiMarkupParser extends MarkupParser {
077
078    protected static final int              READ          = 0;
079    protected static final int              EDIT          = 1;
080    protected static final int              EMPTY         = 2;  // Empty message
081    protected static final int              LOCAL         = 3;
082    protected static final int              LOCALREF      = 4;
083    protected static final int              IMAGE         = 5;
084    protected static final int              EXTERNAL      = 6;
085    protected static final int              INTERWIKI     = 7;
086    protected static final int              IMAGELINK     = 8;
087    protected static final int              IMAGEWIKILINK = 9;
088    protected static final int              ATTACHMENT    = 10;
089
090    private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class );
091
092    private boolean        m_isbold       = false;
093    private boolean        m_isitalic     = false;
094    private boolean        m_istable      = false;
095    private boolean        m_isPre        = false;
096    private boolean        m_isEscaping   = false;
097    private boolean        m_isdefinition = false;
098    private boolean        m_isPreBlock   = false;
099
100    /** Contains style information, in multiple forms. */
101    private Stack<Boolean> m_styleStack   = new Stack<>();
102
103     // general list handling
104    private int            m_genlistlevel = 0;
105    private StringBuilder  m_genlistBulletBuffer = new StringBuilder(10);  // stores the # and * pattern
106    private boolean        m_allowPHPWikiStyleLists = true;
107
108    private boolean        m_isOpenParagraph = false;
109
110    /** Parser for extended link functionality. */
111    private LinkParser     m_linkParser = new LinkParser();
112
113    /** Keeps track of any plain text that gets put in the Text nodes */
114    private StringBuilder  m_plainTextBuf = new StringBuilder(20);
115
116    private Element        m_currentElement;
117
118    /** Keep track of duplicate header names.  */
119    private Map<String, Integer>   m_titleSectionCounter = new HashMap<>();
120
121    /** If true, consider CamelCase hyperlinks as well. */
122    public static final String     PROP_CAMELCASELINKS   = "jspwiki.translatorReader.camelCaseLinks";
123
124    /** If true, all hyperlinks are translated as well, regardless whether they
125        are surrounded by brackets. */
126    public static final String     PROP_PLAINURIS        = "jspwiki.translatorReader.plainUris";
127
128    /** If true, all outward attachment info links have a small link image appended. */
129    public static final String     PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
130
131    /** If true, then considers CamelCase links as well. */
132    private boolean                m_camelCaseLinks      = false;
133
134    /** If true, then generate special output for wysiwyg editing in certain cases */
135    private boolean                m_wysiwygEditorMode     = false;
136
137    /** If true, consider URIs that have no brackets as well. */
138    // FIXME: Currently reserved, but not used.
139    private boolean                m_plainUris           = false;
140
141    /** If true, all outward links use a small link image. */
142    private boolean                m_useOutlinkImage     = true;
143
144    private boolean                m_useAttachmentImage  = true;
145
146    /** If true, allows raw HTML. */
147    private boolean                m_allowHTML           = false;
148
149    private boolean                m_useRelNofollow      = false;
150
151    private PatternCompiler        m_compiler = new Perl5Compiler();
152
153    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
154
155    private PatternMatcher         m_camelCaseMatcher = new Perl5Matcher();
156    private Pattern                m_camelCasePattern;
157
158    private int                    m_rowNum              = 1;
159
160    private Heading                m_lastHeading         = null;
161
162    private static final String CAMELCASE_PATTERN     = "JSPWikiMarkupParser.camelCasePattern";
163
164    /**
165     *  Creates a markup parser.
166     *
167     *  @param context The WikiContext which controls the parsing
168     *  @param in Where the data is read from.
169     */
170    public JSPWikiMarkupParser( WikiContext context, Reader in )
171    {
172        super( context, in );
173        initialize();
174    }
175
176    // FIXME: parsers should be pooled for better performance.
177    private void initialize()
178    {
179        initInlineImagePatterns();
180
181        m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN );
182        if( m_camelCasePattern == null )
183        {
184            try
185            {
186                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,
187                                                         Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
188            }
189            catch( MalformedPatternException e )
190            {
191                log.fatal("Internal error: Someone put in a faulty pattern.",e);
192                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e);
193            }
194            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
195        }
196        //
197        //  Set the properties.
198        //
199        Properties props      = m_engine.getWikiProperties();
200
201        String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
202
203        if( cclinks != null )
204        {
205            m_camelCaseLinks = TextUtil.isPositive( cclinks );
206        }
207        else
208        {
209            m_camelCaseLinks  = TextUtil.getBooleanProperty( props,
210                                                             PROP_CAMELCASELINKS,
211                                                             m_camelCaseLinks );
212        }
213
214        Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE );
215        if( wysiwygVariable != null )
216        {
217            m_wysiwygEditorMode = wysiwygVariable.booleanValue();
218        }
219
220        m_plainUris           = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris );
221        m_useOutlinkImage     = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage );
222        m_useAttachmentImage  = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage );
223        m_allowHTML           = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML );
224        m_useRelNofollow      = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow );
225
226        if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null )
227        {
228            disableAccessRules();
229        }
230
231        m_context.getPage().setHasMetadata();
232    }
233
234    /**
235     *  Calls a transmutator chain.
236     *
237     *  @param list Chain to call
238     *  @param text Text that should be passed to the mutate() method of each of the mutators in the chain.
239     *  @return The result of the mutation.
240     */
241    protected String callMutatorChain( Collection< StringTransmutator > list, String text )
242    {
243        if( list == null || list.size() == 0 )
244        {
245            return text;
246        }
247
248        for( Iterator< StringTransmutator > i = list.iterator(); i.hasNext(); )
249        {
250            StringTransmutator m = i.next();
251
252            text = m.mutate( m_context, text );
253        }
254
255        return text;
256    }
257
258    /**
259     * Calls the heading listeners.
260     *
261     * @param param A Heading object.
262     */
263    protected void callHeadingListenerChain( Heading param )
264    {
265        List< HeadingListener > list = m_headingListenerChain;
266
267        for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); )
268        {
269            HeadingListener h = i.next();
270
271            h.headingAdded( m_context, param );
272        }
273    }
274
275    /**
276     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
277     *  if you really know what you are doing.
278     *
279     *  @param type One of the types above
280     *  @param link URL to which to link to
281     *  @param text Link text
282     *  @param section If a particular section identifier is required.
283     *  @return An A element.
284     *  @since 2.4.78
285     */
286    protected Element createAnchor(int type, String link, String text, String section)
287    {
288        text = escapeHTMLEntities( text );
289        section = escapeHTMLEntities( section );
290        Element el = new Element("a");
291        el.setAttribute("class",CLASS_TYPES[type]);
292        el.setAttribute("href",link+section);
293        el.addContent(text);
294        return el;
295    }
296
297    private Element makeLink( int type, String link, String text, String section, Iterator< Attribute > attributes )
298    {
299        Element el = null;
300
301        if( text == null ) text = link;
302
303        text = callMutatorChain( m_linkMutators, text );
304
305        section = (section != null) ? ("#"+section) : "";
306
307        // Make sure we make a link name that can be accepted
308        // as a valid URL.
309
310        if( link.length() == 0 )
311        {
312            type = EMPTY;
313        }
314        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
315
316        switch(type)
317        {
318            case READ:
319                el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section );
320                break;
321
322            case EDIT:
323                el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" );
324                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
325
326                break;
327
328            case EMPTY:
329                el = new Element("u").addContent(text);
330                break;
331
332                //
333                //  These two are for local references - footnotes and
334                //  references to footnotes.
335                //  We embed the page name (or whatever WikiContext gives us)
336                //  to make sure the links are unique across Wiki.
337                //
338            case LOCALREF:
339                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
340                break;
341
342            case LOCAL:
343                el = new Element("a").setAttribute("class",CLASS_FOOTNOTE);
344                el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1));
345                el.addContent("["+text+"]");
346                break;
347
348                //
349                //  With the image, external and interwiki types we need to
350                //  make sure nobody can put in Javascript or something else
351                //  annoying into the links themselves.  We do this by preventing
352                //  a haxor from stopping the link name short with quotes in
353                //  fillBuffer().
354                //
355            case IMAGE:
356                el = new Element("img").setAttribute("class","inline");
357                el.setAttribute("src",link);
358                el.setAttribute("alt",text);
359                break;
360
361            case IMAGELINK:
362                el = new Element("img").setAttribute("class","inline");
363                el.setAttribute("src",link);
364                el.setAttribute("alt",text);
365                el = createAnchor(IMAGELINK,text,"","").addContent(el);
366                break;
367
368            case IMAGEWIKILINK:
369                String pagelink = m_context.getURL(WikiContext.VIEW,text);
370                el = new Element("img").setAttribute("class","inline");
371                el.setAttribute("src",link);
372                el.setAttribute("alt",text);
373                el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el);
374                break;
375
376            case EXTERNAL:
377                el = createAnchor( EXTERNAL, link, text, section );
378                if( m_useRelNofollow ) el.setAttribute("rel","nofollow");
379                break;
380
381            case INTERWIKI:
382                el = createAnchor( INTERWIKI, link, text, section );
383                break;
384
385            case ATTACHMENT:
386                String attlink = m_context.getURL( WikiContext.ATTACH,
387                                                   link );
388
389                String infolink = m_context.getURL( WikiContext.INFO,
390                                                    link );
391
392                String imglink = m_context.getURL( WikiContext.NONE,
393                                                   "images/attachment_small.png" );
394
395                el = createAnchor( ATTACHMENT, attlink, text, "" );
396
397                pushElement(el);
398                popElement(el.getName());
399
400                if( m_useAttachmentImage )
401                {
402                    el = new Element("img").setAttribute("src",imglink);
403                    el.setAttribute("border","0");
404                    el.setAttribute("alt","(info)");
405
406                    el = new Element("a").setAttribute("href",infolink).addContent(el);
407                    el.setAttribute("class","infolink");
408                }
409                else
410                {
411                    el = null;
412                }
413                break;
414
415            default:
416                break;
417        }
418
419        if( el != null && attributes != null )
420        {
421            while( attributes.hasNext() )
422            {
423                Attribute attr = attributes.next();
424                if( attr != null )
425                {
426                    el.setAttribute(attr);
427                }
428            }
429        }
430
431        if( el != null )
432        {
433            flushPlainText();
434            m_currentElement.addContent( el );
435        }
436        return el;
437    }
438
439    /**
440     *  Figures out if a link is an off-site link.  This recognizes
441     *  the most common protocols by checking how it starts.
442     *
443     *  @param link The link to check.
444     *  @return true, if this is a link outside of this wiki.
445     *  @since 2.4
446     *  @deprecated - use {@link LinkParsingOperations#isExternalLink(String)} instead.
447     */
448    @Deprecated
449    public static boolean isExternalLink( String link )
450    {
451        return new LinkParsingOperations(null).isExternalLink( link );
452    }
453
454    /**
455     *  Returns true if the link is really command to insert
456     *  a plugin.
457     *  <P>
458     *  Currently we just check if the link starts with "{INSERT",
459     *  or just plain "{" but not "{$".
460     *
461     *  @param link Link text, i.e. the contents of text between [].
462     *  @return True, if this link seems to be a command to insert a plugin here.
463     *  @deprecated Use {@link LinkParsingOperations#isPluginLink(String)} instead,
464     */
465    @Deprecated
466    public static boolean isPluginLink( String link )
467    {
468        return new LinkParsingOperations( null ).isPluginLink( link );
469    }
470
471    /**
472     *  These are all of the HTML 4.01 block-level elements.
473     */
474    private static final String[] BLOCK_ELEMENTS = {
475        "address", "blockquote", "div", "dl", "fieldset", "form",
476        "h1", "h2", "h3", "h4", "h5", "h6",
477        "hr", "noscript", "ol", "p", "pre", "table", "ul"
478    };
479
480    private static boolean isBlockLevel( String name )
481    {
482        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
483    }
484
485    /**
486     *  This method peeks ahead in the stream until EOL and returns the result.
487     *  It will keep the buffers untouched.
488     *
489     *  @return The string from the current position to the end of line.
490     */
491
492    // FIXME: Always returns an empty line, even if the stream is full.
493    private String peekAheadLine()
494        throws IOException
495    {
496        String s = readUntilEOL().toString();
497
498        if( s.length() > PUSHBACK_BUFFER_SIZE )
499        {
500            log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters.  Attempting to recover...");
501            pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) );
502        }
503        else
504        {
505            try
506            {
507                pushBack( s );
508            }
509            catch( IOException e )
510            {
511                log.warn("Pushback failed: the line is probably too long.  Attempting to recover.");
512            }
513        }
514        return s;
515    }
516
517    private int flushPlainText()
518    {
519        int numChars = m_plainTextBuf.length();
520
521        if( numChars > 0 )
522        {
523            String buf;
524
525            if( !m_allowHTML )
526            {
527                buf = escapeHTMLEntities(m_plainTextBuf.toString());
528            }
529            else
530            {
531                buf = m_plainTextBuf.toString();
532            }
533            //
534            //  We must first empty the buffer because the side effect of
535            //  calling makeCamelCaseLink() is to call this routine.
536            //
537
538            m_plainTextBuf = new StringBuilder(20);
539
540            try
541            {
542                //
543                //  This is the heaviest part of parsing, and therefore we can
544                //  do some optimization here.
545                //
546                //  1) Only when the length of the buffer is big enough, we try to do the match
547                //
548
549                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 )
550                {
551                    // System.out.println("Buffer="+buf);
552
553                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) )
554                    {
555                        MatchResult result = m_camelCaseMatcher.getMatch();
556
557                        String firstPart = buf.substring(0,result.beginOffset(0));
558                        String prefix = result.group(1);
559
560                        if( prefix == null ) prefix = "";
561
562                        String camelCase = result.group(2);
563                        String protocol  = result.group(3);
564                        String uri       = protocol+result.group(4);
565                        buf              = buf.substring(result.endOffset(0));
566
567                        m_currentElement.addContent( firstPart );
568
569                        //
570                        //  Check if the user does not wish to do URL or WikiWord expansion
571                        //
572                        if( prefix.endsWith("~") || prefix.indexOf('[') != -1 )
573                        {
574                            if( prefix.endsWith("~") )
575                            {
576                                if( m_wysiwygEditorMode )
577                                {
578                                    m_currentElement.addContent( "~" );
579                                }
580                                prefix = prefix.substring(0,prefix.length()-1);
581                            }
582                            if( camelCase != null )
583                            {
584                                m_currentElement.addContent( prefix+camelCase );
585                            }
586                            else if( protocol != null )
587                            {
588                                m_currentElement.addContent( prefix+uri );
589                            }
590                            continue;
591                        }
592
593                        //
594                        //  Fine, then let's check what kind of a link this was
595                        //  and emit the proper elements
596                        //
597                        if( protocol != null )
598                        {
599                            char c = uri.charAt(uri.length()-1);
600                            if( c == '.' || c == ',' )
601                            {
602                                uri = uri.substring(0,uri.length()-1);
603                                buf = c + buf;
604                            }
605                            // System.out.println("URI match "+uri);
606                            m_currentElement.addContent( prefix );
607                            makeDirectURILink( uri );
608                        }
609                        else
610                        {
611                            // System.out.println("Matched: '"+camelCase+"'");
612                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
613                            // System.out.println("prefix="+prefix);
614                            m_currentElement.addContent( prefix );
615
616                            makeCamelCaseLink( camelCase );
617                        }
618                    }
619
620                    m_currentElement.addContent( buf );
621                }
622                else
623                {
624                    //
625                    //  No camelcase asked for, just add the elements
626                    //
627                    m_currentElement.addContent( buf );
628                }
629            }
630            catch( IllegalDataException e )
631            {
632                //
633                // Sometimes it's possible that illegal XML chars is added to the data.
634                // Here we make sure it does not stop parsing.
635                //
636                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
637            }
638        }
639
640        return numChars;
641    }
642
643    /**
644     *  Escapes XML entities in a HTML-compatible way (i.e. does not escape
645     *  entities that are already escaped).
646     *
647     *  @param buf
648     *  @return An escaped string.
649     */
650    private String escapeHTMLEntities(String buf)
651    {
652        StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
653
654        for( int i = 0; i < buf.length(); i++ )
655        {
656            char ch = buf.charAt(i);
657
658            if( ch == '<' )
659            {
660                tmpBuf.append("&lt;");
661            }
662            else if( ch == '>' )
663            {
664                tmpBuf.append("&gt;");
665            }
666            else if( ch == '\"' )
667            {
668                tmpBuf.append("&quot;");
669            }
670            else if( ch == '&' )
671            {
672                //
673                //  If the following is an XML entity reference (&#.*;) we'll
674                //  leave it as it is; otherwise we'll replace it with an &amp;
675                //
676
677                boolean isEntity = false;
678                StringBuilder entityBuf = new StringBuilder();
679
680                if( i < buf.length() -1 )
681                {
682                    for( int j = i; j < buf.length(); j++ )
683                    {
684                        char ch2 = buf.charAt(j);
685
686                        if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' )
687                        {
688                            entityBuf.append(ch2);
689
690                            if( ch2 == ';' )
691                            {
692                                isEntity = true;
693                                break;
694                            }
695                        }
696                        else
697                        {
698                            break;
699                        }
700                    }
701                }
702
703                if( isEntity )
704                {
705                    tmpBuf.append( entityBuf );
706                    i = i + entityBuf.length() - 1;
707                }
708                else
709                {
710                    tmpBuf.append("&amp;");
711                }
712
713            }
714            else
715            {
716                tmpBuf.append( ch );
717            }
718        }
719
720        return tmpBuf.toString();
721    }
722
723    private Element pushElement( Element e )
724    {
725        flushPlainText();
726        m_currentElement.addContent( e );
727        m_currentElement = e;
728
729        return e;
730    }
731
732    private Element addElement( Content e )
733    {
734        if( e != null )
735        {
736            flushPlainText();
737            m_currentElement.addContent( e );
738        }
739        return m_currentElement;
740    }
741
742    /**
743     *  All elements that can be empty by the HTML DTD.
744     */
745    //  Keep sorted.
746    private static final String[] EMPTY_ELEMENTS = {
747        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
748    };
749
750    /**
751     *  Goes through the current element stack and pops all elements until this
752     *  element is found - this essentially "closes" and element.
753     *
754     *  @param s
755     *  @return The new current element, or null, if there was no such element in the entire stack.
756     */
757    private Element popElement( String s )
758    {
759        int flushedBytes = flushPlainText();
760
761        Element currEl = m_currentElement;
762
763        while( currEl.getParentElement() != null )
764        {
765            if( currEl.getName().equals(s) && !currEl.isRootElement() )
766            {
767                m_currentElement = currEl.getParentElement();
768
769                //
770                //  Check if it's okay for this element to be empty.  Then we will
771                //  trick the JDOM generator into not generating an empty element,
772                //  by putting an empty string between the tags.  Yes, it's a kludge
773                //  but what'cha gonna do about it. :-)
774                //
775
776                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 )
777                {
778                    currEl.addContent("");
779                }
780
781                return m_currentElement;
782            }
783
784            currEl = currEl.getParentElement();
785        }
786
787        return null;
788    }
789
790
791    /**
792     *  Reads the stream until it meets one of the specified
793     *  ending characters, or stream end.  The ending character will be left
794     *  in the stream.
795     */
796    private String readUntil( String endChars )
797        throws IOException
798    {
799        StringBuilder sb = new StringBuilder( 80 );
800        int ch = nextToken();
801
802        while( ch != -1 )
803        {
804            if( ch == '\\' )
805            {
806                ch = nextToken();
807                if( ch == -1 )
808                {
809                    break;
810                }
811            }
812            else
813            {
814                if( endChars.indexOf((char)ch) != -1 )
815                {
816                    pushBack( ch );
817                    break;
818                }
819            }
820            sb.append( (char) ch );
821            ch = nextToken();
822        }
823
824        return sb.toString();
825    }
826
827    /**
828     *  Reads the stream while the characters that have been specified are
829     *  in the stream, returning then the result as a String.
830     */
831    private String readWhile( String endChars )
832        throws IOException
833    {
834        StringBuilder sb = new StringBuilder( 80 );
835        int ch = nextToken();
836
837        while( ch != -1 )
838        {
839            if( endChars.indexOf((char)ch) == -1 )
840            {
841                pushBack( ch );
842                break;
843            }
844
845            sb.append( (char) ch );
846            ch = nextToken();
847        }
848
849        return sb.toString();
850    }
851
852    private JSPWikiMarkupParser m_cleanTranslator;
853
854    /**
855     *  Does a lazy init.  Otherwise, we would get into a situation
856     *  where HTMLRenderer would try and boot a TranslatorReader before
857     *  the TranslatorReader it is contained by is up.
858     */
859    private JSPWikiMarkupParser getCleanTranslator()
860    {
861        if( m_cleanTranslator == null )
862        {
863            WikiContext dummyContext = new WikiContext( m_engine,
864                                                        m_context.getHttpRequest(),
865                                                        m_context.getPage() );
866            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
867
868            m_cleanTranslator.m_allowHTML = true;
869        }
870
871        return m_cleanTranslator;
872    }
873    /**
874     *  Modifies the "hd" parameter to contain proper values.  Because
875     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
876     *  % after url encoding with '_'.
877     *  <p>
878     *  Counts also duplicate headings (= headings with similar name), and
879     *  attaches a counter.
880     */
881    private String makeHeadingAnchor( String baseName, String title, Heading hd )
882    {
883        hd.m_titleText = title;
884        title = MarkupParser.wikifyLink( title );
885
886        hd.m_titleSection = m_engine.encodeName(title);
887
888        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) )
889        {
890            Integer count = m_titleSectionCounter.get( hd.m_titleSection );
891            count = count + 1;
892            m_titleSectionCounter.put( hd.m_titleSection, count );
893            hd.m_titleSection += "-" + count;
894        }
895        else
896        {
897            m_titleSectionCounter.put( hd.m_titleSection, 1 );
898        }
899
900        hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+
901                           "-"+hd.m_titleSection;
902        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
903        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
904
905        return hd.m_titleAnchor;
906    }
907
908    private String makeSectionTitle( String title )
909    {
910        title = title.trim();
911        String outTitle;
912
913        try
914        {
915            JSPWikiMarkupParser dtr = getCleanTranslator();
916            dtr.setInputReader( new StringReader(title) );
917
918            CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse());
919
920            outTitle = ctt.getString();
921        }
922        catch( IOException e )
923        {
924            log.fatal("CleanTranslator not working", e);
925            throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() , e);
926        }
927
928        return outTitle;
929    }
930
931    /**
932     *  Returns XHTML for the heading.
933     *
934     *  @param level The level of the heading.  @see Heading
935     *  @param title the title for the heading
936     *  @param hd a List to which heading should be added
937     *  @return An Element containing the heading
938     */
939    public Element makeHeading( int level, String title, Heading hd )
940    {
941        Element el = null;
942
943        String pageName = m_context.getPage().getName();
944
945        String outTitle = makeSectionTitle( title );
946
947        hd.m_level = level;
948
949        switch( level )
950        {
951          case Heading.HEADING_SMALL:
952            el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
953            break;
954
955          case Heading.HEADING_MEDIUM:
956            el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
957            break;
958
959          case Heading.HEADING_LARGE:
960            el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ));
961            break;
962
963          default:
964            throw new InternalWikiException("Illegal heading type "+level);
965        }
966
967
968        return el;
969    }
970
971    /**
972     *  When given a link to a WikiName, we just return
973     *  a proper HTML link for it.  The local link mutator
974     *  chain is also called.
975     */
976    private Element makeCamelCaseLink( String wikiname )
977    {
978        String matchedLink = m_linkParsingOperations.linkIfExists( wikiname );
979
980        callMutatorChain( m_localLinkMutatorChain, wikiname );
981
982        if( matchedLink != null ) {
983            makeLink( READ, matchedLink, wikiname, null, null );
984        } else {
985            makeLink( EDIT, wikiname, wikiname, null, null );
986        }
987
988        return m_currentElement;
989    }
990
991    /** Holds the image URL for the duration of this parser */
992    private String m_outlinkImageURL = null;
993
994    /**
995     *  Returns an element for the external link image (out.png).  However,
996     *  this method caches the URL for the lifetime of this MarkupParser,
997     *  because it's commonly used, and we'll end up with possibly hundreds
998     *  our thousands of references to it...  It's a lot faster, too.
999     *
1000     *  @return  An element containing the HTML for the outlink image.
1001     */
1002    private Element outlinkImage()
1003    {
1004        Element el = null;
1005
1006        if( m_useOutlinkImage )
1007        {
1008            if( m_outlinkImageURL == null )
1009            {
1010                m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE );
1011            }
1012
1013            el = new Element( "img" ).setAttribute( "class", OUTLINK );
1014            el.setAttribute( "src", m_outlinkImageURL );
1015            el.setAttribute( "alt","" );
1016        }
1017
1018        return el;
1019    }
1020
1021    /**
1022     *  Takes an URL and turns it into a regular wiki link.  Unfortunately,
1023     *  because of the way that flushPlainText() works, it already encodes
1024     *  all of the XML entities.  But so does WikiContext.getURL(), so we
1025     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
1026     *  <p>
1027     *  What a crappy problem.
1028     *
1029     * @param url
1030     * @return An anchor Element containing the link.
1031     */
1032    private Element makeDirectURILink( String url )
1033    {
1034        Element result;
1035        String last = null;
1036
1037        if( url.endsWith(",") || url.endsWith(".") )
1038        {
1039            last = url.substring( url.length()-1 );
1040            url  = url.substring( 0, url.length()-1 );
1041        }
1042
1043        callMutatorChain( m_externalLinkMutatorChain, url );
1044
1045        if( m_linkParsingOperations.isImageLink( url ) )
1046        {
1047            result = handleImageLink( StringUtils.replace(url,"&amp;","&"), url, false );
1048        }
1049        else
1050        {
1051            result = makeLink( EXTERNAL, StringUtils.replace(url,"&amp;","&"), url, null, null );
1052            addElement( outlinkImage() );
1053        }
1054
1055        if( last != null )
1056        {
1057            m_plainTextBuf.append(last);
1058        }
1059
1060        return result;
1061    }
1062
1063    /**
1064     *  Image links are handled differently:
1065     *  1. If the text is a WikiName of an existing page,
1066     *     it gets linked.
1067     *  2. If the text is an external link, then it is inlined.
1068     *  3. Otherwise it becomes an ALT text.
1069     *
1070     *  @param reallink The link to the image.
1071     *  @param link     Link text portion, may be a link to somewhere else.
1072     *  @param hasLinkText If true, then the defined link had a link text available.
1073     *                  This means that the link text may be a link to a wiki page,
1074     *                  or an external resource.
1075     */
1076
1077    // FIXME: isExternalLink() is called twice.
1078    private Element handleImageLink( String reallink, String link, boolean hasLinkText )
1079    {
1080        String possiblePage = MarkupParser.cleanLink( link );
1081
1082        if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText )
1083        {
1084            return makeLink( IMAGELINK, reallink, link, null, null );
1085        }
1086        else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText )
1087        {
1088            // System.out.println("Orig="+link+", Matched: "+matchedLink);
1089            callMutatorChain( m_localLinkMutatorChain, possiblePage );
1090
1091            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
1092        }
1093        else
1094        {
1095            return makeLink( IMAGE, reallink, link, null, null );
1096        }
1097    }
1098
1099    private Element handleAccessRule( String ruleLine )
1100    {
1101        if( m_wysiwygEditorMode )
1102        {
1103            m_currentElement.addContent( "[" + ruleLine + "]" );
1104        }
1105
1106        if( !m_parseAccessRules ) return m_currentElement;
1107        Acl acl;
1108        WikiPage          page = m_context.getRealPage();
1109        // UserDatabase      db = m_context.getEngine().getUserDatabase();
1110
1111        if( ruleLine.startsWith( "{" ) )
1112            ruleLine = ruleLine.substring( 1 );
1113        if( ruleLine.endsWith( "}" ) )
1114            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
1115
1116        if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine);
1117
1118        try
1119        {
1120            acl = m_engine.getAclManager().parseAcl( page, ruleLine );
1121
1122            page.setAcl( acl );
1123
1124            if( log.isDebugEnabled() ) log.debug( acl.toString() );
1125        }
1126        catch( WikiSecurityException wse )
1127        {
1128            return makeError( wse.getMessage() );
1129        }
1130
1131        return m_currentElement;
1132    }
1133
1134    /**
1135     *  Handles metadata setting [{SET foo=bar}]
1136     */
1137    private Element handleMetadata( String link )
1138    {
1139        if( m_wysiwygEditorMode )
1140        {
1141            m_currentElement.addContent( "[" + link + "]" );
1142        }
1143
1144        try
1145        {
1146            String args = link.substring( link.indexOf(' '), link.length()-1 );
1147
1148            String name = args.substring( 0, args.indexOf('=') );
1149            String val  = args.substring( args.indexOf('=')+1, args.length() );
1150
1151            name = name.trim();
1152            val  = val.trim();
1153
1154            if( val.startsWith("'") ) val = val.substring( 1 );
1155            if( val.endsWith("'") )   val = val.substring( 0, val.length()-1 );
1156
1157            // log.debug("SET name='"+name+"', value='"+val+"'.");
1158
1159            if( name.length() > 0 && val.length() > 0 )
1160            {
1161                val = m_engine.getVariableManager().expandVariables( m_context,
1162                                                                     val );
1163
1164                m_context.getPage().setAttribute( name, val );
1165            }
1166        }
1167        catch( Exception e )
1168        {
1169            ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1170            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
1171        }
1172
1173        return m_currentElement;
1174    }
1175
1176    /**
1177     *  Emits a processing instruction that will disable markup escaping. This is
1178     *  very useful if you want to emit HTML directly into the stream.
1179     *
1180     */
1181    private void disableOutputEscaping()
1182    {
1183        addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") );
1184    }
1185
1186    /**
1187     *  Gobbles up all hyperlinks that are encased in square brackets.
1188     */
1189    private Element handleHyperlinks( String linktext, int pos )
1190    {
1191        ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1192
1193        StringBuilder sb = new StringBuilder(linktext.length()+80);
1194
1195        if( m_linkParsingOperations.isAccessRule( linktext ) )
1196        {
1197            return handleAccessRule( linktext );
1198        }
1199
1200        if( m_linkParsingOperations.isMetadata( linktext ) )
1201        {
1202            return handleMetadata( linktext );
1203        }
1204
1205        if( m_linkParsingOperations.isPluginLink( linktext ) )
1206        {
1207            try
1208            {
1209                PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
1210                //
1211                //  This might sometimes fail, especially if there is something which looks
1212                //  like a plugin invocation but is really not.
1213                //
1214                if( pluginContent != null )
1215                {
1216                    addElement( pluginContent );
1217
1218                    pluginContent.executeParse( m_context );
1219                }
1220            }
1221            catch( PluginException e )
1222            {
1223                log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
1224                //log.info( "Root cause:",e.getRootThrowable() );
1225                if( !m_wysiwygEditorMode )
1226                {
1227                    ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE );
1228                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ),
1229                                                                        m_context.getRealPage().getWiki(),
1230                                                                        m_context.getRealPage().getName(),
1231                                                                        e.getMessage() ) ) );
1232                }
1233            }
1234
1235            return m_currentElement;
1236        }
1237
1238        try
1239        {
1240            LinkParser.Link link = m_linkParser.parse(linktext);
1241            linktext       = link.getText();
1242            String linkref = link.getReference();
1243
1244            //
1245            //  Yes, we now have the components separated.
1246            //  linktext = the text the link should have
1247            //  linkref  = the url or page name.
1248            //
1249            //  In many cases these are the same.  [linktext|linkref].
1250            //
1251            if( m_linkParsingOperations.isVariableLink( linktext ) )
1252            {
1253                Content el = new VariableContent(linktext);
1254
1255                addElement( el );
1256            }
1257            else if( m_linkParsingOperations.isExternalLink( linkref ) )
1258            {
1259                // It's an external link, out of this Wiki
1260
1261                callMutatorChain( m_externalLinkMutatorChain, linkref );
1262
1263                if( m_linkParsingOperations.isImageLink( linkref ) )
1264                {
1265                    handleImageLink( linkref, linktext, link.hasReference() );
1266                }
1267                else
1268                {
1269                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
1270                    addElement( outlinkImage() );
1271                }
1272            }
1273            else if( link.isInterwikiLink() )
1274            {
1275                // It's an interwiki link
1276                // InterWiki links also get added to external link chain
1277                // after the links have been resolved.
1278
1279                // FIXME: There is an interesting issue here:  We probably should
1280                //        URLEncode the wikiPage, but we can't since some of the
1281                //        Wikis use slashes (/), which won't survive URLEncoding.
1282                //        Besides, we don't know which character set the other Wiki
1283                //        is using, so you'll have to write the entire name as it appears
1284                //        in the URL.  Bugger.
1285
1286                String extWiki  = link.getExternalWiki();
1287                String wikiPage = link.getExternalWikiPage();
1288
1289                if( m_wysiwygEditorMode )
1290                {
1291                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
1292                }
1293                else
1294                {
1295                    String urlReference = m_engine.getInterWikiURL( extWiki );
1296
1297                    if( urlReference != null )
1298                    {
1299                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
1300                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
1301
1302                        if( m_linkParsingOperations.isImageLink(urlReference) )
1303                        {
1304                            handleImageLink( urlReference, linktext, link.hasReference() );
1305                        }
1306                        else
1307                        {
1308                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
1309                        }
1310
1311                        if( m_linkParsingOperations.isExternalLink(urlReference) )
1312                        {
1313                            addElement( outlinkImage() );
1314                        }
1315                    }
1316                    else
1317                    {
1318                        Object[] args = { extWiki };
1319                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
1320                    }
1321                }
1322            }
1323            else if( linkref.startsWith("#") )
1324            {
1325                // It defines a local footnote
1326                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1327            }
1328            else if( TextUtil.isNumber( linkref ) )
1329            {
1330                // It defines a reference to a local footnote
1331                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1332            }
1333            else
1334            {
1335                int hashMark = -1;
1336
1337                //
1338                //  Internal wiki link, but is it an attachment link?
1339                //
1340                String attachment = m_engine.getAttachmentManager().getAttachmentInfoName( m_context, linkref );
1341                if( attachment != null )
1342                {
1343                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1344
1345                    if( m_linkParsingOperations.isImageLink( linkref ) )
1346                    {
1347                        attachment = m_context.getURL( WikiContext.ATTACH, attachment );
1348                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1349                    }
1350                    else
1351                    {
1352                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1353                    }
1354                }
1355                else if( (hashMark = linkref.indexOf('#')) != -1 )
1356                {
1357                    // It's an internal Wiki link, but to a named section
1358
1359                    String namedSection = linkref.substring( hashMark+1 );
1360                    linkref = linkref.substring( 0, hashMark );
1361
1362                    linkref = MarkupParser.cleanLink( linkref );
1363
1364                    callMutatorChain( m_localLinkMutatorChain, linkref );
1365
1366                    String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1367                    if( matchedLink != null ) {
1368                        String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection));
1369                        sectref = sectref.replace('%', '_');
1370                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1371                    } else {
1372                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1373                    }
1374                }
1375                else
1376                {
1377                    // It's an internal Wiki link
1378                    linkref = MarkupParser.cleanLink( linkref );
1379
1380                    callMutatorChain( m_localLinkMutatorChain, linkref );
1381
1382                    String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1383                    if( matchedLink != null ) {
1384                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1385                    } else {
1386                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1387                    }
1388                }
1389            }
1390        }
1391        catch( ParseException e )
1392        {
1393            log.info("Parser failure: ",e);
1394            Object[] args = { e.getMessage() };
1395            addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1396        }
1397
1398        return m_currentElement;
1399    }
1400
1401    /**
1402     *  Pushes back any string that has been read.  It will obviously
1403     *  be pushed back in a reverse order.
1404     *
1405     *  @since 2.1.77
1406     */
1407    private void pushBack( String s )
1408        throws IOException
1409    {
1410        for( int i = s.length()-1; i >= 0; i-- )
1411        {
1412            pushBack( s.charAt(i) );
1413        }
1414    }
1415
1416    private Element handleBackslash()
1417        throws IOException
1418    {
1419        int ch = nextToken();
1420
1421        if( ch == '\\' )
1422        {
1423            int ch2 = nextToken();
1424
1425            if( ch2 == '\\' )
1426            {
1427                pushElement( new Element("br").setAttribute("clear","all"));
1428                return popElement("br");
1429            }
1430
1431            pushBack( ch2 );
1432
1433            pushElement( new Element("br") );
1434            return popElement("br");
1435        }
1436
1437        pushBack( ch );
1438
1439        return null;
1440    }
1441
1442    private Element handleUnderscore()
1443        throws IOException
1444    {
1445        int ch = nextToken();
1446        Element el = null;
1447
1448        if( ch == '_' )
1449        {
1450            if( m_isbold )
1451            {
1452                el = popElement("b");
1453            }
1454            else
1455            {
1456                el = pushElement( new Element("b") );
1457            }
1458            m_isbold = !m_isbold;
1459        }
1460        else
1461        {
1462            pushBack( ch );
1463        }
1464
1465        return el;
1466    }
1467
1468
1469    /**
1470     *  For example: italics.
1471     */
1472    private Element handleApostrophe()
1473        throws IOException
1474    {
1475        int ch = nextToken();
1476        Element el = null;
1477
1478        if( ch == '\'' )
1479        {
1480            if( m_isitalic )
1481            {
1482                el = popElement("i");
1483            }
1484            else
1485            {
1486                el = pushElement( new Element("i") );
1487            }
1488            m_isitalic = !m_isitalic;
1489        }
1490        else
1491        {
1492            pushBack( ch );
1493        }
1494
1495        return el;
1496    }
1497
1498    private Element handleOpenbrace( boolean isBlock )
1499        throws IOException
1500    {
1501        int ch = nextToken();
1502
1503        if( ch == '{' )
1504        {
1505            int ch2 = nextToken();
1506
1507            if( ch2 == '{' )
1508            {
1509                m_isPre = true;
1510                m_isEscaping = true;
1511                m_isPreBlock = isBlock;
1512
1513                if( isBlock )
1514                {
1515                    startBlockLevel();
1516                    return pushElement( new Element("pre") );
1517                }
1518
1519                return pushElement( new Element("span").setAttribute("style","font-family:monospace; white-space:pre;") );
1520            }
1521
1522            pushBack( ch2 );
1523
1524            return pushElement( new Element("tt") );
1525        }
1526
1527        pushBack( ch );
1528
1529        return null;
1530    }
1531
1532    /**
1533     *  Handles both }} and }}}
1534     */
1535    private Element handleClosebrace()
1536        throws IOException
1537    {
1538        int ch2 = nextToken();
1539
1540        if( ch2 == '}' )
1541        {
1542            int ch3 = nextToken();
1543
1544            if( ch3 == '}' )
1545            {
1546                if( m_isPre )
1547                {
1548                    if( m_isPreBlock )
1549                    {
1550                        popElement( "pre" );
1551                    }
1552                    else
1553                    {
1554                        popElement( "span" );
1555                    }
1556
1557                    m_isPre = false;
1558                    m_isEscaping = false;
1559                    return m_currentElement;
1560                }
1561
1562                m_plainTextBuf.append("}}}");
1563                return m_currentElement;
1564            }
1565
1566            pushBack( ch3 );
1567
1568            if( !m_isEscaping )
1569            {
1570                return popElement("tt");
1571            }
1572        }
1573
1574        pushBack( ch2 );
1575
1576        return null;
1577    }
1578
1579    private Element handleDash()
1580        throws IOException
1581    {
1582        int ch = nextToken();
1583
1584        if( ch == '-' )
1585        {
1586            int ch2 = nextToken();
1587
1588            if( ch2 == '-' )
1589            {
1590                int ch3 = nextToken();
1591
1592                if( ch3 == '-' )
1593                {
1594                    // Empty away all the rest of the dashes.
1595                    // Do not forget to return the first non-match back.
1596                    do
1597                    {
1598                        ch = nextToken();
1599                    }
1600                    while ( ch == '-' );
1601
1602                    pushBack(ch);
1603                    startBlockLevel();
1604                    pushElement( new Element("hr") );
1605                    return popElement( "hr" );
1606                }
1607
1608                pushBack( ch3 );
1609            }
1610            pushBack( ch2 );
1611        }
1612
1613        pushBack( ch );
1614
1615        return null;
1616    }
1617
1618    private Element handleHeading()
1619        throws IOException
1620    {
1621        Element el = null;
1622
1623        int ch  = nextToken();
1624
1625        Heading hd = new Heading();
1626
1627        if( ch == '!' )
1628        {
1629            int ch2 = nextToken();
1630
1631            if( ch2 == '!' )
1632            {
1633                String title = peekAheadLine();
1634
1635                el = makeHeading( Heading.HEADING_LARGE, title, hd);
1636            }
1637            else
1638            {
1639                pushBack( ch2 );
1640                String title = peekAheadLine();
1641                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1642            }
1643        }
1644        else
1645        {
1646            pushBack( ch );
1647            String title = peekAheadLine();
1648            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1649        }
1650
1651        callHeadingListenerChain( hd );
1652
1653        m_lastHeading = hd;
1654
1655        if( el != null ) pushElement(el);
1656
1657        return el;
1658    }
1659
1660    /**
1661     *  Reads the stream until the next EOL or EOF.  Note that it will also read the
1662     *  EOL from the stream.
1663     */
1664    private StringBuilder readUntilEOL()
1665        throws IOException
1666    {
1667        int ch;
1668        StringBuilder buf = new StringBuilder( 256 );
1669
1670        while( true )
1671        {
1672            ch = nextToken();
1673
1674            if( ch == -1 )
1675                break;
1676
1677            buf.append( (char) ch );
1678
1679            if( ch == '\n' )
1680                break;
1681        }
1682        return buf;
1683    }
1684
1685    /** Controls whether italic is restarted after a paragraph shift */
1686
1687    private boolean m_restartitalic = false;
1688    private boolean m_restartbold   = false;
1689
1690    private boolean m_newLine;
1691
1692    /**
1693     *  Starts a block level element, therefore closing
1694     *  a potential open paragraph tag.
1695     */
1696    private void startBlockLevel()
1697    {
1698        // These may not continue over block level limits in XHTML
1699
1700        popElement("i");
1701        popElement("b");
1702        popElement("tt");
1703
1704        if( m_isOpenParagraph )
1705        {
1706            m_isOpenParagraph = false;
1707            popElement("p");
1708            m_plainTextBuf.append("\n"); // Just small beautification
1709        }
1710
1711        m_restartitalic = m_isitalic;
1712        m_restartbold   = m_isbold;
1713
1714        m_isitalic = false;
1715        m_isbold   = false;
1716    }
1717
1718    private static String getListType( char c )
1719    {
1720        if( c == '*' )
1721        {
1722            return "ul";
1723        }
1724        else if( c == '#' )
1725        {
1726            return "ol";
1727        }
1728        throw new InternalWikiException("Parser got faulty list type: "+c);
1729    }
1730    /**
1731     *  Like original handleOrderedList() and handleUnorderedList()
1732     *  however handles both ordered ('#') and unordered ('*') mixed together.
1733     */
1734
1735    // FIXME: Refactor this; it's a bit messy.
1736
1737    private Element handleGeneralList()
1738        throws IOException
1739    {
1740         startBlockLevel();
1741
1742         String strBullets = readWhile( "*#" );
1743         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
1744         int numBullets = strBullets.length();
1745
1746         // override the beginning portion of bullet pattern to be like the previous
1747         // to simulate PHPWiki style lists
1748
1749         if(m_allowPHPWikiStyleLists)
1750         {
1751             // only substitute if different
1752             if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1753                   (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) )
1754             {
1755                 if(numBullets <= m_genlistlevel)
1756                 {
1757                     // Substitute all but the last character (keep the expressed bullet preference)
1758                     strBullets  = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "")
1759                                   + strBullets.substring(numBullets-1, numBullets);
1760                 }
1761                 else
1762                 {
1763                     strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets);
1764                 }
1765             }
1766         }
1767
1768         //
1769         //  Check if this is still of the same type
1770         //
1771         if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals
1772            (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) )
1773         {
1774             if( numBullets > m_genlistlevel )
1775             {
1776                 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) );
1777
1778                 for( ; m_genlistlevel < numBullets; m_genlistlevel++ )
1779                 {
1780                     // bullets are growing, get from new bullet list
1781                     pushElement( new Element("li") );
1782                     pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) ));
1783                 }
1784             }
1785             else if( numBullets < m_genlistlevel )
1786             {
1787                 //  Close the previous list item.
1788                 // buf.append( m_renderer.closeListItem() );
1789                 popElement( "li" );
1790
1791                 for( ; m_genlistlevel > numBullets; m_genlistlevel-- )
1792                 {
1793                     // bullets are shrinking, get from old bullet list
1794
1795                     popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1796                     if( m_genlistlevel > 0 )
1797                     {
1798                         popElement( "li" );
1799                     }
1800
1801                 }
1802             }
1803             else
1804             {
1805                 if( m_genlistlevel > 0 )
1806                 {
1807                     popElement( "li" );
1808                 }
1809             }
1810         }
1811         else
1812         {
1813             //
1814             //  The pattern has changed, unwind and restart
1815             //
1816             int  numEqualBullets;
1817             int  numCheckBullets;
1818
1819             // find out how much is the same
1820             numEqualBullets = 0;
1821             numCheckBullets = Math.min(numBullets,m_genlistlevel);
1822
1823             while( numEqualBullets < numCheckBullets )
1824             {
1825                 // if the bullets are equal so far, keep going
1826                 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets))
1827                     numEqualBullets++;
1828                 // otherwise giveup, we have found how many are equal
1829                 else
1830                     break;
1831             }
1832
1833             //unwind
1834             for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- )
1835             {
1836                 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) );
1837                 if( m_genlistlevel > numBullets )
1838                 {
1839                     popElement("li");
1840                 }
1841             }
1842
1843             //rewind
1844
1845             pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) );
1846             for(int i = numEqualBullets; i < numBullets; i++)
1847             {
1848                 pushElement( new Element("li") );
1849                 pushElement( new Element( getListType( strBullets.charAt(i) ) ) );
1850             }
1851             m_genlistlevel = numBullets;
1852         }
1853
1854         //
1855         //  Push a new list item, and eat away any extra whitespace
1856         //
1857         pushElement( new Element("li") );
1858         readWhile(" ");
1859
1860         // work done, remember the new bullet list (in place of old one)
1861         m_genlistBulletBuffer.setLength(0);
1862         m_genlistBulletBuffer.append(strBullets);
1863
1864         return m_currentElement;
1865    }
1866
1867    private Element unwindGeneralList()
1868    {
1869        //unwind
1870        for( ; m_genlistlevel > 0; m_genlistlevel-- )
1871        {
1872            popElement( "li" );
1873            popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) );
1874        }
1875
1876        m_genlistBulletBuffer.setLength(0);
1877
1878        return null;
1879    }
1880
1881
1882    private Element handleDefinitionList()
1883        throws IOException
1884    {
1885        if( !m_isdefinition )
1886        {
1887            m_isdefinition = true;
1888
1889            startBlockLevel();
1890
1891            pushElement( new Element("dl") );
1892            return pushElement( new Element("dt") );
1893        }
1894
1895        return null;
1896    }
1897
1898    private Element handleOpenbracket()
1899        throws IOException
1900    {
1901        StringBuilder sb = new StringBuilder(40);
1902        int pos = getPosition();
1903        int ch = nextToken();
1904        boolean isPlugin = false;
1905
1906        if( ch == '[' )
1907        {
1908            if( m_wysiwygEditorMode )
1909            {
1910                sb.append( '[' );
1911            }
1912
1913            sb.append( (char)ch );
1914
1915            while( (ch = nextToken()) == '[' )
1916            {
1917                sb.append( (char)ch );
1918            }
1919        }
1920
1921
1922        if( ch == '{' )
1923        {
1924            isPlugin = true;
1925        }
1926
1927        pushBack( ch );
1928
1929        if( sb.length() > 0 )
1930        {
1931            m_plainTextBuf.append( sb );
1932            return m_currentElement;
1933        }
1934
1935        //
1936        //  Find end of hyperlink
1937        //
1938
1939        ch = nextToken();
1940        int nesting = 1;    // Check for nested plugins
1941
1942        while( ch != -1 )
1943        {
1944            int ch2 = nextToken(); pushBack(ch2);
1945
1946            if( isPlugin )
1947            {
1948                if( ch == '[' && ch2 == '{' )
1949                {
1950                    nesting++;
1951                }
1952                else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' )
1953                {
1954                    break;
1955                }
1956                else if( ch == '}' && ch2 == ']' )
1957                {
1958                    // NB: This will be decremented once at the end
1959                    nesting--;
1960                }
1961            }
1962            else
1963            {
1964                if( ch == ']' )
1965                {
1966                    break;
1967                }
1968            }
1969
1970            sb.append( (char) ch );
1971
1972            ch = nextToken();
1973        }
1974
1975        //
1976        //  If the link is never finished, do some tricks to display the rest of the line
1977        //  unchanged.
1978        //
1979        if( ch == -1 )
1980        {
1981            log.debug("Warning: unterminated link detected!");
1982            m_isEscaping = true;
1983            m_plainTextBuf.append( sb );
1984            flushPlainText();
1985            m_isEscaping = false;
1986            return m_currentElement;
1987        }
1988
1989        return handleHyperlinks( sb.toString(), pos );
1990    }
1991
1992    /**
1993     *  Reads the stream until the current brace is closed or stream end.
1994     */
1995    private String readBraceContent( char opening, char closing )
1996        throws IOException
1997    {
1998        StringBuilder sb = new StringBuilder(40);
1999        int braceLevel = 1;
2000        int ch;
2001        while(( ch = nextToken() ) != -1 )
2002        {
2003            if( ch == '\\' )
2004            {
2005                continue;
2006            }
2007            else if ( ch == opening )
2008            {
2009                braceLevel++;
2010            }
2011            else if ( ch == closing )
2012            {
2013                braceLevel--;
2014                if (braceLevel==0)
2015                {
2016                  break;
2017                }
2018            }
2019            sb.append( (char)ch );
2020        }
2021        return sb.toString();
2022    }
2023
2024
2025    /**
2026     *  Handles constructs of type %%(style) and %%class
2027     * @param newLine
2028     * @return An Element containing the div or span, depending on the situation.
2029     * @throws IOException
2030     */
2031    private Element handleDiv( boolean newLine )
2032        throws IOException
2033    {
2034        int ch = nextToken();
2035        Element el = null;
2036
2037        if( ch == '%' )
2038        {
2039            String style = null;
2040            String clazz = null;
2041
2042            ch = nextToken();
2043
2044            //
2045            //  Style or class?
2046            //
2047            if( ch == '(' )
2048            {
2049                style = readBraceContent('(',')');
2050            }
2051            else if( Character.isLetter( (char) ch ) )
2052            {
2053                pushBack( ch );
2054                clazz = readUntil( " \t\n\r" );
2055                //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters
2056                //CSS Classnames can contain only the characters [a-zA-Z0-9] and
2057                //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_".
2058                //They cannot start with a digit, two hyphens, or a hyphen followed by a digit.
2059
2060                //(1) replace '.' by spaces, allowing multiple classnames on a div or span
2061                //(2) remove any invalid character
2062                if( clazz != null){
2063
2064                    clazz = clazz.replace('.', ' ')
2065                                 .replaceAll("[^\\s-_\\w\\x200-\\x377]+","");
2066
2067                }
2068                ch = nextToken();
2069
2070                //
2071                //  Pop out only spaces, so that the upcoming EOL check does not check the
2072                //  next line.
2073                //
2074                if( ch == '\n' || ch == '\r' )
2075                {
2076                    pushBack(ch);
2077                }
2078            }
2079            else
2080            {
2081                //
2082                // Anything else stops.
2083                //
2084
2085                pushBack(ch);
2086
2087                try
2088                {
2089                    Boolean isSpan = m_styleStack.pop();
2090
2091                    if( isSpan == null )
2092                    {
2093                        // Fail quietly
2094                    }
2095                    else if( isSpan.booleanValue() )
2096                    {
2097                        el = popElement( "span" );
2098                    }
2099                    else
2100                    {
2101                        el = popElement( "div" );
2102                    }
2103                }
2104                catch( EmptyStackException e )
2105                {
2106                    log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened.");
2107                    return m_currentElement;
2108                }
2109
2110                return el;
2111            }
2112
2113            //
2114            //  Check if there is an attempt to do something nasty
2115            //
2116
2117            try
2118            {
2119                style = StringEscapeUtils.unescapeHtml(style);
2120                if( style != null && style.indexOf("javascript:") != -1 )
2121                {
2122                    log.debug("Attempt to output javascript within CSS:"+style);
2123                    ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2124                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
2125                }
2126            }
2127            catch( NumberFormatException e )
2128            {
2129                //
2130                //  If there are unknown entities, we don't want the parser to stop.
2131                //
2132                ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
2133                String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
2134                return addElement( makeError( msg ) );
2135            }
2136
2137            //
2138            //  Decide if we should open a div or a span?
2139            //
2140            String eol = peekAheadLine();
2141
2142            if( eol.trim().length() > 0 )
2143            {
2144                // There is stuff after the class
2145
2146                el = new Element("span");
2147
2148                m_styleStack.push( Boolean.TRUE );
2149            }
2150            else
2151            {
2152                startBlockLevel();
2153                el = new Element("div");
2154                m_styleStack.push( Boolean.FALSE );
2155            }
2156
2157            if( style != null ) el.setAttribute("style", style);
2158            if( clazz != null ) el.setAttribute("class", clazz);
2159            el = pushElement( el );
2160
2161            return el;
2162        }
2163
2164        pushBack(ch);
2165
2166        return el;
2167    }
2168
2169    private Element handleSlash( boolean newLine )
2170        throws IOException
2171    {
2172        int ch = nextToken();
2173
2174        pushBack(ch);
2175        if( ch == '%' && !m_styleStack.isEmpty() )
2176        {
2177            return handleDiv( newLine );
2178        }
2179
2180        return null;
2181    }
2182
2183    private Element handleBar( boolean newLine )
2184        throws IOException
2185    {
2186        Element el = null;
2187
2188        if( !m_istable && !newLine )
2189        {
2190            return null;
2191        }
2192
2193        //
2194        //  If the bar is in the first column, we will either start
2195        //  a new table or continue the old one.
2196        //
2197
2198        if( newLine )
2199        {
2200            if( !m_istable )
2201            {
2202                startBlockLevel();
2203                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
2204                m_istable = true;
2205                m_rowNum = 0;
2206            }
2207
2208            m_rowNum++;
2209            Element tr = ( m_rowNum % 2 != 0 )
2210                       ? new Element("tr").setAttribute("class", "odd")
2211                       : new Element("tr");
2212            el = pushElement( tr );
2213        }
2214
2215        //
2216        //  Check out which table cell element to start;
2217        //  a header element (th) or a regular element (td).
2218        //
2219        int ch = nextToken();
2220
2221        if( ch == '|' )
2222        {
2223            if( !newLine )
2224            {
2225                el = popElement("th");
2226                if( el == null ) popElement("td");
2227            }
2228            el = pushElement( new Element("th") );
2229        }
2230        else
2231        {
2232            if( !newLine )
2233            {
2234                el = popElement("td");
2235                if( el == null ) popElement("th");
2236            }
2237
2238            el = pushElement( new Element("td") );
2239
2240            pushBack( ch );
2241        }
2242
2243        return el;
2244    }
2245
2246    /**
2247     *  Generic escape of next character or entity.
2248     */
2249    private Element handleTilde()
2250        throws IOException
2251    {
2252        int ch = nextToken();
2253
2254        if( ch == ' ' )
2255        {
2256            if( m_wysiwygEditorMode )
2257            {
2258                m_plainTextBuf.append( "~ " );
2259            }
2260            return m_currentElement;
2261        }
2262
2263        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
2264            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
2265            ch == '{' || ch == ']' || ch == '}' || ch == '%' )
2266        {
2267            if( m_wysiwygEditorMode )
2268            {
2269                m_plainTextBuf.append( '~' );
2270            }
2271
2272            m_plainTextBuf.append( (char)ch );
2273            m_plainTextBuf.append(readWhile( ""+(char)ch ));
2274            return m_currentElement;
2275        }
2276
2277        // No escape.
2278        pushBack( ch );
2279
2280        return null;
2281    }
2282
2283    private void fillBuffer( Element startElement )
2284        throws IOException
2285    {
2286        m_currentElement = startElement;
2287
2288        boolean quitReading = false;
2289        m_newLine = true;
2290        disableOutputEscaping();
2291
2292        while(!quitReading)
2293        {
2294            int ch = nextToken();
2295
2296            if( ch == -1 ) break;
2297
2298            //
2299            //  Check if we're actually ending the preformatted mode.
2300            //  We still must do an entity transformation here.
2301            //
2302            if( m_isEscaping )
2303            {
2304                if( ch == '}' )
2305                {
2306                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
2307                }
2308                else if( ch == -1 )
2309                {
2310                    quitReading = true;
2311                }
2312                else if( ch == '\r' )
2313                {
2314                    // DOS line feeds we ignore.
2315                }
2316                else if( ch == '<' )
2317                {
2318                    m_plainTextBuf.append( "&lt;" );
2319                }
2320                else if( ch == '>' )
2321                {
2322                    m_plainTextBuf.append( "&gt;" );
2323                }
2324                else if( ch == '&' )
2325                {
2326                    m_plainTextBuf.append( "&amp;" );
2327                }
2328                else if( ch == '~' )
2329                {
2330                    String braces = readWhile("}");
2331                    if( braces.length() >= 3 )
2332                    {
2333                        m_plainTextBuf.append("}}}");
2334
2335                        braces = braces.substring(3);
2336                    }
2337                    else
2338                    {
2339                        m_plainTextBuf.append( (char) ch );
2340                    }
2341
2342                    for( int i = braces.length()-1; i >= 0; i-- )
2343                    {
2344                        pushBack(braces.charAt(i));
2345                    }
2346                }
2347                else
2348                {
2349                    m_plainTextBuf.append( (char) ch );
2350                }
2351
2352                continue;
2353            }
2354
2355            //
2356            //  An empty line stops a list
2357            //
2358            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 )
2359            {
2360                m_plainTextBuf.append(unwindGeneralList());
2361            }
2362
2363            if( m_newLine && ch != '|' && m_istable )
2364            {
2365                popElement("table");
2366                m_istable = false;
2367            }
2368
2369            int skip = IGNORE;
2370
2371            //
2372            //  Do the actual parsing and catch any errors.
2373            //
2374            try
2375            {
2376                skip = parseToken( ch );
2377            }
2378            catch( IllegalDataException e )
2379            {
2380                log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage());
2381
2382                makeError("Error: "+cleanupSuspectData(e.getMessage()) );
2383            }
2384
2385            //
2386            //   The idea is as follows:  If the handler method returns
2387            //   an element (el != null), it is assumed that it has been
2388            //   added in the stack.  Otherwise the character is added
2389            //   as is to the plaintext buffer.
2390            //
2391            //   For the transition phase, if s != null, it also gets
2392            //   added in the plaintext buffer.
2393            //
2394
2395            switch( skip )
2396            {
2397                case ELEMENT:
2398                    m_newLine = false;
2399                    break;
2400
2401                case CHARACTER:
2402                    m_plainTextBuf.append( (char) ch );
2403                    m_newLine = false;
2404                    break;
2405
2406                case IGNORE:
2407                default:
2408                    break;
2409            }
2410        }
2411
2412        closeHeadings();
2413        popElement("domroot");
2414    }
2415
2416    private String cleanupSuspectData( String s )
2417    {
2418        StringBuilder sb = new StringBuilder( s.length() );
2419
2420        for( int i = 0; i < s.length(); i++ )
2421        {
2422            char c = s.charAt(i);
2423
2424            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
2425            else sb.append( "0x"+Integer.toString(c,16).toUpperCase() );
2426        }
2427
2428        return sb.toString();
2429    }
2430
2431    /** The token is a plain character. */
2432    protected static final int CHARACTER = 0;
2433
2434    /** The token is a wikimarkup element. */
2435    protected static final int ELEMENT   = 1;
2436
2437    /** The token is to be ignored. */
2438    protected static final int IGNORE    = 2;
2439
2440    /**
2441     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
2442     *  you think this was a wiki markup element, and IGNORE, if you think
2443     *  we should ignore this altogether.
2444     *  <p>
2445     *  To add your own MarkupParser, you can override this method, but it
2446     *  is recommended that you call super.parseToken() as well to gain advantage
2447     *  of JSPWiki's own markup.  You can call it at the start of your own
2448     *  parseToken() or end - it does not matter.
2449     *
2450     * @param ch The character under investigation
2451     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2452     * @throws IOException If parsing fails.
2453     */
2454    protected int parseToken( int ch )
2455        throws IOException
2456    {
2457        Element el = null;
2458
2459        //
2460        //  Now, check the incoming token.
2461        //
2462        switch( ch )
2463        {
2464          case '\r':
2465            // DOS linefeeds we forget
2466            return IGNORE;
2467
2468          case '\n':
2469            //
2470            //  Close things like headings, etc.
2471            //
2472
2473            // FIXME: This is not really very fast
2474
2475            closeHeadings();
2476
2477            popElement("dl"); // Close definition lists.
2478            if( m_istable )
2479            {
2480                popElement("tr");
2481            }
2482
2483            m_isdefinition = false;
2484
2485            if( m_newLine )
2486            {
2487                // Paragraph change.
2488                startBlockLevel();
2489
2490                //
2491                //  Figure out which elements cannot be enclosed inside
2492                //  a <p></p> pair according to XHTML rules.
2493                //
2494                String nextLine = peekAheadLine();
2495                if( nextLine.length() == 0 ||
2496                    (nextLine.length() > 0 &&
2497                     !nextLine.startsWith("{{{") &&
2498                     !nextLine.startsWith("----") &&
2499                     !nextLine.startsWith("%%") &&
2500                     "*#!;".indexOf( nextLine.charAt(0) ) == -1) )
2501                {
2502                    pushElement( new Element("p") );
2503                    m_isOpenParagraph = true;
2504
2505                    if( m_restartitalic )
2506                    {
2507                        pushElement( new Element("i") );
2508                        m_isitalic = true;
2509                        m_restartitalic = false;
2510                    }
2511                    if( m_restartbold )
2512                    {
2513                        pushElement( new Element("b") );
2514                        m_isbold = true;
2515                        m_restartbold = false;
2516                    }
2517                }
2518            }
2519            else
2520            {
2521                m_plainTextBuf.append("\n");
2522                m_newLine = true;
2523            }
2524            return IGNORE;
2525
2526
2527          case '\\':
2528            el = handleBackslash();
2529            break;
2530
2531          case '_':
2532            el = handleUnderscore();
2533            break;
2534
2535          case '\'':
2536            el = handleApostrophe();
2537            break;
2538
2539          case '{':
2540            el = handleOpenbrace( m_newLine );
2541            break;
2542
2543          case '}':
2544            el = handleClosebrace();
2545            break;
2546
2547          case '-':
2548            if( m_newLine )
2549                el = handleDash();
2550
2551            break;
2552
2553          case '!':
2554            if( m_newLine )
2555            {
2556                el = handleHeading();
2557            }
2558            break;
2559
2560          case ';':
2561            if( m_newLine )
2562            {
2563                el = handleDefinitionList();
2564            }
2565            break;
2566
2567          case ':':
2568            if( m_isdefinition )
2569            {
2570                popElement("dt");
2571                el = pushElement( new Element("dd") );
2572                m_isdefinition = false;
2573            }
2574            break;
2575
2576          case '[':
2577            el = handleOpenbracket();
2578            break;
2579
2580          case '*':
2581            if( m_newLine )
2582            {
2583                pushBack('*');
2584                el = handleGeneralList();
2585            }
2586            break;
2587
2588          case '#':
2589            if( m_newLine )
2590            {
2591                pushBack('#');
2592                el = handleGeneralList();
2593            }
2594            break;
2595
2596          case '|':
2597            el = handleBar( m_newLine );
2598            break;
2599
2600          case '~':
2601            el = handleTilde();
2602            break;
2603
2604          case '%':
2605            el = handleDiv( m_newLine );
2606            break;
2607
2608          case '/':
2609            el = handleSlash( m_newLine );
2610            break;
2611
2612          default:
2613            break;
2614        }
2615
2616        return el != null ? ELEMENT : CHARACTER;
2617    }
2618
2619    private void closeHeadings()
2620    {
2621        if( m_lastHeading != null && !m_wysiwygEditorMode )
2622        {
2623            // Add the hash anchor element at the end of the heading
2624            addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) );
2625            m_lastHeading = null;
2626        }
2627        popElement("h2");
2628        popElement("h3");
2629        popElement("h4");
2630    }
2631
2632    /**
2633     *  Parses the entire document from the Reader given in the constructor or
2634     *  set by {@link #setInputReader(Reader)}.
2635     *
2636     *  @return A WikiDocument, ready to be passed to the renderer.
2637     *  @throws IOException If parsing cannot be accomplished.
2638     */
2639    @Override
2640    public WikiDocument parse()
2641        throws IOException
2642    {
2643        WikiDocument d = new WikiDocument( m_context.getPage() );
2644        d.setContext( m_context );
2645
2646        Element rootElement = new Element("domroot");
2647
2648        d.setRootElement( rootElement );
2649
2650        fillBuffer( rootElement );
2651
2652        paragraphify(rootElement);
2653
2654        return d;
2655    }
2656
2657    /**
2658     *  Checks out that the first paragraph is correctly installed.
2659     *
2660     *  @param rootElement
2661     */
2662    private void paragraphify(Element rootElement)
2663    {
2664        //
2665        //  Add the paragraph tag to the first paragraph
2666        //
2667        List< Content > kids = rootElement.getContent();
2668
2669        if( rootElement.getChild("p") != null )
2670        {
2671            ArrayList<Content> ls = new ArrayList<>();
2672            int idxOfFirstContent = 0;
2673            int count = 0;
2674
2675            for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ )
2676            {
2677                Content c = i.next();
2678                if( c instanceof Element )
2679                {
2680                    String name = ( ( Element )c ).getName();
2681                    if( isBlockLevel( name ) ) break;
2682                }
2683
2684                if( !(c instanceof ProcessingInstruction) )
2685                {
2686                    ls.add( c );
2687                    if( idxOfFirstContent == 0 ) idxOfFirstContent = count;
2688                }
2689            }
2690
2691            //
2692            //  If there were any elements, then add a new <p> (unless it would
2693            //  be an empty one)
2694            //
2695            if( ls.size() > 0 )
2696            {
2697                Element newel = new Element("p");
2698
2699                for( Iterator< Content > i = ls.iterator(); i.hasNext(); )
2700                {
2701                    Content c = i.next();
2702
2703                    c.detach();
2704                    newel.addContent(c);
2705                }
2706
2707                //
2708                // Make sure there are no empty <p/> tags added.
2709                //
2710                if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() )
2711                    rootElement.addContent(idxOfFirstContent, newel);
2712            }
2713        }
2714    }
2715
2716}