001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.commons.text.StringEscapeUtils;
023import org.apache.logging.log4j.LogManager;
024import org.apache.logging.log4j.Logger;
025import org.apache.oro.text.regex.MalformedPatternException;
026import org.apache.oro.text.regex.MatchResult;
027import org.apache.oro.text.regex.Pattern;
028import org.apache.oro.text.regex.PatternCompiler;
029import org.apache.oro.text.regex.PatternMatcher;
030import org.apache.oro.text.regex.Perl5Compiler;
031import org.apache.oro.text.regex.Perl5Matcher;
032import org.apache.wiki.InternalWikiException;
033import org.apache.wiki.StringTransmutator;
034import org.apache.wiki.api.core.Acl;
035import org.apache.wiki.api.core.Context;
036import org.apache.wiki.api.core.ContextEnum;
037import org.apache.wiki.api.core.Page;
038import org.apache.wiki.api.exceptions.PluginException;
039import org.apache.wiki.api.plugin.Plugin;
040import org.apache.wiki.api.spi.Wiki;
041import org.apache.wiki.attachment.AttachmentManager;
042import org.apache.wiki.auth.AuthorizationManager;
043import org.apache.wiki.auth.UserManager;
044import org.apache.wiki.auth.WikiSecurityException;
045import org.apache.wiki.auth.acl.AclManager;
046import org.apache.wiki.i18n.InternationalizationManager;
047import org.apache.wiki.preferences.Preferences;
048import org.apache.wiki.util.TextUtil;
049import org.apache.wiki.util.XmlUtil;
050import org.apache.wiki.variables.VariableManager;
051import org.jdom2.Attribute;
052import org.jdom2.Content;
053import org.jdom2.Element;
054import org.jdom2.IllegalDataException;
055import org.jdom2.ProcessingInstruction;
056import org.jdom2.Verifier;
057
058import javax.xml.transform.Result;
059import java.io.IOException;
060import java.io.Reader;
061import java.io.StringReader;
062import java.text.MessageFormat;
063import java.util.ArrayList;
064import java.util.Arrays;
065import java.util.Collection;
066import java.util.EmptyStackException;
067import java.util.HashMap;
068import java.util.Iterator;
069import java.util.List;
070import java.util.Map;
071import java.util.Properties;
072import java.util.ResourceBundle;
073import java.util.Stack;
074
075/**
076 * Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the heart and soul of JSPWiki : make
077 * sure you test properly anything that is added, or else it breaks down horribly.
078 *
079 *  @since  2.4
080 */
081public class JSPWikiMarkupParser extends MarkupParser {
082
083    protected static final int              READ          = 0;
084    protected static final int              EDIT          = 1;
085    protected static final int              EMPTY         = 2;  // Empty message
086    protected static final int              LOCAL         = 3;
087    protected static final int              LOCALREF      = 4;
088    protected static final int              IMAGE         = 5;
089    protected static final int              EXTERNAL      = 6;
090    protected static final int              INTERWIKI     = 7;
091    protected static final int              IMAGELINK     = 8;
092    protected static final int              IMAGEWIKILINK = 9;
093    protected static final int              ATTACHMENT    = 10;
094
095    private static final Logger LOG = LogManager.getLogger( JSPWikiMarkupParser.class );
096
097    private boolean        m_isbold;
098    private boolean        m_isitalic;
099    private boolean        m_istable;
100    private boolean        m_isPre;
101    private boolean        m_isEscaping;
102    private boolean        m_isdefinition;
103    private boolean        m_isPreBlock;
104
105    /** Contains style information, in multiple forms. */
106    private final Stack< Boolean > m_styleStack = new Stack<>();
107
108     // general list handling
109    private int m_genlistlevel;
110    private final StringBuilder m_genlistBulletBuffer = new StringBuilder( 10 );  // stores the # and * pattern
111    private final boolean m_allowPHPWikiStyleLists = true;
112
113    private boolean m_isOpenParagraph;
114
115    /** Parser for extended link functionality. */
116    private final LinkParser m_linkParser = new LinkParser();
117
118    /** Keeps track of any plain text that gets put in the Text nodes */
119    private StringBuilder m_plainTextBuf = new StringBuilder( 20 );
120
121    private Element m_currentElement;
122
123    /** Keep track of duplicate header names.  */
124    private final Map< String, Integer > m_titleSectionCounter = new HashMap<>();
125
126    /** If true, then considers CamelCase links as well. */
127    private boolean m_camelCaseLinks;
128
129    /** If true, then generate special output for wysiwyg editing in certain cases */
130    private boolean m_wysiwygEditorMode;
131
132    /** If true, consider URIs that have no brackets as well. */
133    // FIXME: Currently reserved, but not used.
134    private boolean m_plainUris;
135
136    /** If true, all outward links use a small link image. */
137    private boolean m_useOutlinkImage = true;
138
139    private boolean m_useAttachmentImage = true;
140
141    /** If true, allows raw HTML. */
142    private boolean m_allowHTML;
143
144    private boolean m_useRelNofollow;
145
146    private final PatternCompiler m_compiler = new Perl5Compiler();
147
148    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
149
150    private final PatternMatcher m_camelCaseMatcher = new Perl5Matcher();
151    private Pattern m_camelCasePattern;
152
153    private int m_rowNum = 1;
154
155    private Heading m_lastHeading;
156
157    private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern";
158
159    /**
160     *  Creates a markup parser.
161     *
162     *  @param context The WikiContext which controls the parsing
163     *  @param in Where the data is read from.
164     */
165    public JSPWikiMarkupParser( final Context context, final Reader in ) {
166        super( context, in );
167        initialize();
168    }
169
170    // FIXME: parsers should be pooled for better performance.
171    private void initialize() {
172        initInlineImagePatterns();
173
174        m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN );
175        if( m_camelCasePattern == null ) {
176            try {
177                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
178            } catch( final MalformedPatternException e ) {
179                LOG.fatal("Internal error: Someone put in a faulty pattern.",e);
180                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e);
181            }
182            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
183        }
184
185        //  Set the properties.
186        final Properties props = m_engine.getWikiProperties();
187        final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
188
189        if( cclinks != null ) {
190            m_camelCaseLinks = TextUtil.isPositive( cclinks );
191        } else {
192            m_camelCaseLinks  = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks );
193        }
194
195        final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE );
196        if( wysiwygVariable != null ) {
197            m_wysiwygEditorMode = wysiwygVariable;
198        }
199
200        m_plainUris          = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris );
201        m_useOutlinkImage    = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage );
202        m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage );
203        m_allowHTML          = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML );
204        m_useRelNofollow     = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow );
205
206        if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) {
207            disableAccessRules();
208        }
209
210        m_context.getPage().setHasMetadata();
211    }
212
213    /**
214     *  Calls a transmutator chain.
215     *
216     *  @param list Chain to call
217     *  @param text Text that should be passed to the mutate() method of each of the mutators in the chain.
218     *  @return The result of the mutation.
219     */
220    protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) {
221        if( list == null || list.size() == 0 ) {
222            return text;
223        }
224
225        for( final StringTransmutator m : list ) {
226            text = m.mutate( m_context, text );
227        }
228
229        return text;
230    }
231
232    /**
233     * Calls the heading listeners.
234     *
235     * @param param A Heading object.
236     */
237    private void callHeadingListenerChain( final Heading param ) {
238        for( final HeadingListener h : m_headingListenerChain ) {
239            h.headingAdded( m_context, param );
240        }
241    }
242
243    /**
244     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
245     *  if you really know what you are doing.
246     *
247     *  @param type One of the types above
248     *  @param link URL to which to link to
249     *  @param text Link text
250     *  @param section If a particular section identifier is required.
251     *  @return An A element.
252     *  @since 2.4.78
253     */
254    private Element createAnchor( final int type, final String link, String text, String section ) {
255        text = escapeHTMLEntities( text );
256        section = escapeHTMLEntities( section );
257        final Element el = new Element( "a" );
258        el.setAttribute( "class", CLASS_TYPES[ type ] );
259        el.setAttribute( "href", link + section );
260        el.addContent( text );
261        return el;
262    }
263
264    private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes ) {
265        Element el = null;
266        if( text == null ) {
267            text = link;
268        }
269        text = callMutatorChain( m_linkMutators, text );
270        section = (section != null) ? ("#"+section) : "";
271
272        // Make sure we make a link name that can be accepted  as a valid URL.
273        if( link.isEmpty() ) {
274            type = EMPTY;
275        }
276        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
277
278        switch( type ) {
279            case READ:
280                el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section );
281                break;
282
283            case EDIT:
284                el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" );
285                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
286                break;
287
288            case EMPTY:
289                el = new Element("u").addContent(text);
290                break;
291
292            // These two are for local references - footnotes and references to footnotes.
293            // We embed the page name (or whatever WikiContext gives us) to make sure the links are unique across Wiki.
294            case LOCALREF:
295                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
296                break;
297
298            case LOCAL:
299                el = new Element( "a" ).setAttribute( "class", CLASS_FOOTNOTE );
300                el.setAttribute( "name", "ref-" + m_context.getName() + "-" + link.substring( 1 ) );
301                el.addContent( "[" + text + "]" );
302                break;
303
304                //  With the image, external and interwiki types we need to make sure nobody can put in Javascript or
305                //  something else annoying into the links themselves.  We do this by preventing a haxor from stopping
306                //  the link name short with quotes in fillBuffer().
307            case IMAGE:
308                el = new Element( "img" ).setAttribute( "class", "inline" );
309                el.setAttribute( "src", link );
310                el.setAttribute( "alt", text );
311                break;
312
313            case IMAGELINK:
314                el = new Element( "img" ).setAttribute( "class", "inline" );
315                el.setAttribute( "src", link );
316                el.setAttribute( "alt", text );
317                el = createAnchor( IMAGELINK, text, "", "" ).addContent( el );
318                break;
319
320            case IMAGEWIKILINK:
321                final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text );
322                el = new Element( "img" ).setAttribute( "class", "inline" );
323                el.setAttribute( "src", link );
324                el.setAttribute( "alt", text );
325                el = createAnchor( IMAGEWIKILINK, pagelink, "", "" ).addContent( el );
326                break;
327
328            case EXTERNAL:
329                el = createAnchor( EXTERNAL, link, text, section );
330                if( m_useRelNofollow ) {
331                    el.setAttribute( "rel", "nofollow" );
332                }
333                break;
334
335            case INTERWIKI:
336                el = createAnchor( INTERWIKI, link, text, section );
337                break;
338
339            case ATTACHMENT:
340                final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link );
341                final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link );
342                final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" );
343                el = createAnchor( ATTACHMENT, attlink, text, "" );
344                if(  m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) {
345                    el.setAttribute("download", "");
346                }
347
348                pushElement( el );
349                popElement( el.getName() );
350
351                if( m_useAttachmentImage ) {
352                    el = new Element( "img" ).setAttribute( "src", imglink );
353                    el.setAttribute( "border", "0" );
354                    el.setAttribute( "alt", "(info)" );
355
356                    el = new Element( "a" ).setAttribute( "href", infolink ).addContent( el );
357                    el.setAttribute( "class", "infolink" );
358                } else {
359                    el = null;
360                }
361                break;
362
363            default:
364                break;
365        }
366
367        if( el != null && attributes != null ) {
368            while( attributes.hasNext() ) {
369                final Attribute attr = attributes.next();
370                if( attr != null ) {
371                    el.setAttribute( attr );
372                }
373            }
374        }
375
376        if( el != null ) {
377            flushPlainText();
378            m_currentElement.addContent( el );
379        }
380        return el;
381    }
382
383    /**
384     *  These are all the HTML 4.01 block-level elements.
385     */
386    private static final String[] BLOCK_ELEMENTS = {
387        "address", "blockquote", "div", "dl", "fieldset", "form",
388        "h1", "h2", "h3", "h4", "h5", "h6",
389        "hr", "noscript", "ol", "p", "pre", "table", "ul"
390    };
391
392    private static boolean isBlockLevel( final String name ) {
393        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
394    }
395
396    /**
397     *  This method peeks ahead in the stream until EOL and returns the result. It will keep the buffers untouched.
398     *
399     *  @return The string from the current position to the end of line.
400     */
401    // FIXME: Always returns an empty line, even if the stream is full.
402    private String peekAheadLine() throws IOException {
403        final String s = readUntilEOL().toString();
404        if( s.length() > PUSHBACK_BUFFER_SIZE ) {
405            LOG.warn( "Line is longer than maximum allowed size (" + PUSHBACK_BUFFER_SIZE + " characters.  Attempting to recover..." );
406            pushBack( s.substring( 0, PUSHBACK_BUFFER_SIZE - 1 ) );
407        } else {
408            try {
409                pushBack( s );
410            } catch( final IOException e ) {
411                LOG.warn( "Pushback failed: the line is probably too long.  Attempting to recover." );
412            }
413        }
414        return s;
415    }
416
417    private int flushPlainText() {
418        final int numChars = m_plainTextBuf.length();
419        if( numChars > 0 ) {
420            String buf;
421
422            if( !m_allowHTML ) {
423                buf = escapeHTMLEntities(m_plainTextBuf.toString());
424            } else {
425                buf = m_plainTextBuf.toString();
426            }
427            //  We must first empty the buffer because the side effect of calling makeCamelCaseLink() is to call this routine.
428            m_plainTextBuf = new StringBuilder(20);
429            try {
430                // This is the heaviest part of parsing, and therefore we can do some optimization here.
431                // 1) Only when the length of the buffer is big enough, we try to do the match
432                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) {
433                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) {
434                        final MatchResult result = m_camelCaseMatcher.getMatch();
435                        final String firstPart = buf.substring( 0, result.beginOffset( 0 ) );
436                        String prefix = result.group( 1 );
437                        if( prefix == null ) {
438                            prefix = "";
439                        }
440
441                        final String camelCase = result.group(2);
442                        final String protocol  = result.group(3);
443                        String uri       = protocol+result.group(4);
444                        buf              = buf.substring(result.endOffset(0));
445
446                        m_currentElement.addContent( firstPart );
447                        //  Check if the user does not wish to do URL or WikiWord expansion
448                        if( prefix.endsWith( "~" ) || prefix.indexOf( '[' ) != -1 ) {
449                            if( prefix.endsWith( "~" ) ) {
450                                if( m_wysiwygEditorMode ) {
451                                    m_currentElement.addContent( "~" );
452                                }
453                                prefix = prefix.substring( 0, prefix.length() - 1 );
454                            }
455                            if( camelCase != null ) {
456                                m_currentElement.addContent( prefix + camelCase );
457                            } else if( protocol != null ) {
458                                m_currentElement.addContent( prefix + uri );
459                            }
460                            continue;
461                        }
462
463                        // Fine, then let's check what kind of link this was and emit the proper elements
464                        if( protocol != null ) {
465                            final char c = uri.charAt( uri.length() - 1 );
466                            if( c == '.' || c == ',' ) {
467                                uri = uri.substring( 0, uri.length() - 1 );
468                                buf = c + buf;
469                            }
470                            // System.out.println("URI match "+uri);
471                            m_currentElement.addContent( prefix );
472                            makeDirectURILink( uri );
473                        } else {
474                            // System.out.println("Matched: '"+camelCase+"'");
475                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
476                            // System.out.println("prefix="+prefix);
477                            m_currentElement.addContent( prefix );
478                            makeCamelCaseLink( camelCase );
479                        }
480                    }
481                    m_currentElement.addContent( buf );
482                } else {
483                    //  No camelcase asked for, just add the elements
484                    m_currentElement.addContent( buf );
485                }
486            } catch( final IllegalDataException e ) {
487                // Sometimes it's possible that illegal XML chars is added to the data. Here we make sure it does not stop parsing.
488                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
489            }
490        }
491
492        return numChars;
493    }
494
495    /**
496     *  Escapes XML entities in a HTML-compatible way (i.e. does not escape entities that are already escaped).
497     *
498     *  @param buf
499     *  @return An escaped string.
500     */
501    private String escapeHTMLEntities( final String buf ) {
502        final StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
503        for( int i = 0; i < buf.length(); i++ ) {
504            final char ch = buf.charAt(i);
505            if( ch == '<' ) {
506                tmpBuf.append("&lt;");
507            } else if( ch == '>' ) {
508                tmpBuf.append("&gt;");
509            } else if( ch == '\"' ) {
510                tmpBuf.append("&quot;");
511            } else if( ch == '&' ) {
512                // If the following is an XML entity reference (&#.*;) we'll leave it as it is; otherwise we'll replace it with an &amp;
513                boolean isEntity = false;
514                final StringBuilder entityBuf = new StringBuilder();
515                if( i < buf.length() -1 ) {
516                    for( int j = i; j < buf.length(); j++ ) {
517                        final char ch2 = buf.charAt( j );
518                        if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) {
519                            entityBuf.append(ch2);
520                            if( ch2 == ';' ) {
521                                isEntity = true;
522                                break;
523                            }
524                        } else {
525                            break;
526                        }
527                    }
528                }
529
530                if( isEntity ) {
531                    tmpBuf.append( entityBuf );
532                    i = i + entityBuf.length() - 1;
533                } else {
534                    tmpBuf.append( "&amp;" );
535                }
536
537            } else {
538                tmpBuf.append( ch );
539            }
540        }
541
542        return tmpBuf.toString();
543    }
544
545    private Element pushElement( final Element e ) {
546        flushPlainText();
547        m_currentElement.addContent( e );
548        m_currentElement = e;
549
550        return e;
551    }
552
553    private Element addElement( final Content e ) {
554        if( e != null ) {
555            flushPlainText();
556            m_currentElement.addContent( e );
557        }
558        return m_currentElement;
559    }
560
561    /**
562     *  All elements that can be empty by the HTML DTD.
563     */
564    //  Keep sorted.
565    private static final String[] EMPTY_ELEMENTS = {
566        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
567    };
568
569    /**
570     *  Goes through the current element stack and pops all elements until this
571     *  element is found - this essentially "closes" and element.
572     *
573     *  @param s element to be found.
574     *  @return The new current element, or null, if there was no such element in the entire stack.
575     */
576    private Element popElement( final String s ) {
577        final int flushedBytes = flushPlainText();
578        Element currEl = m_currentElement;
579        while( currEl.getParentElement() != null ) {
580            if( currEl.getName().equals( s ) && !currEl.isRootElement() ) {
581                m_currentElement = currEl.getParentElement();
582
583                //  Check if it's okay for this element to be empty.  Then we will
584                //  trick the JDOM generator into not generating an empty element,
585                //  by putting an empty string between the tags.  Yes, it's a kludge
586                //  but what'cha gonna do about it. :-)
587                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) {
588                    currEl.addContent( "" );
589                }
590                return m_currentElement;
591            }
592            currEl = currEl.getParentElement();
593        }
594        return null;
595    }
596
597
598    /**
599     * Reads the stream until it meets one of the specified ending characters, or stream end. The ending
600     * character will be left in the stream.
601     */
602    private String readUntil( final String endChars ) throws IOException {
603        final StringBuilder sb = new StringBuilder( 80 );
604        int ch = nextToken();
605        while( ch != -1 ) {
606            if( ch == '\\' ) {
607                ch = nextToken();
608                if( ch == -1 ) {
609                    break;
610                }
611            } else {
612                if( endChars.indexOf( ( char )ch ) != -1 ) {
613                    pushBack( ch );
614                    break;
615                }
616            }
617            sb.append( ( char )ch );
618            ch = nextToken();
619        }
620
621        return sb.toString();
622    }
623
624    /**
625     *  Reads the stream while the characters that have been specified are
626     *  in the stream, returning then the result as a String.
627     */
628    private String readWhile( final String endChars ) throws IOException {
629        final StringBuilder sb = new StringBuilder( 80 );
630        int ch = nextToken();
631        while( ch != -1 ) {
632            if( endChars.indexOf( ( char ) ch ) == -1 ) {
633                pushBack( ch );
634                break;
635            }
636            sb.append( ( char ) ch );
637            ch = nextToken();
638        }
639
640        return sb.toString();
641    }
642
643    private JSPWikiMarkupParser m_cleanTranslator;
644
645    /**
646     *  Does a lazy init.  Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before
647     *  the TranslatorReader it is contained by is up.
648     */
649    private JSPWikiMarkupParser getCleanTranslator() {
650        if( m_cleanTranslator == null ) {
651            final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() );
652            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
653            m_cleanTranslator.m_allowHTML = true;
654        }
655
656        return m_cleanTranslator;
657    }
658
659    /**
660     *  Modifies the "hd" parameter to contain proper values.  Because
661     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
662     *  % after url encoding with '_'.
663     *  <p>
664     *  Counts also duplicate headings (= headings with similar name), and
665     *  attaches a counter.
666     */
667    protected String makeHeadingAnchor( final String baseName, String title, final Heading hd ) {
668        hd.m_titleText = title;
669        title = MarkupParser.wikifyLink( title );
670        hd.m_titleSection = m_engine.encodeName(title);
671        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) {
672            final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1;
673            m_titleSectionCounter.put( hd.m_titleSection, count );
674            hd.m_titleSection += "-" + count;
675        } else {
676            m_titleSectionCounter.put( hd.m_titleSection, 1 );
677        }
678
679        hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection;
680        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
681        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
682
683        return hd.m_titleAnchor;
684    }
685
686    private String makeSectionTitle( String title ) {
687        title = title.trim();
688        try {
689            final JSPWikiMarkupParser dtr = getCleanTranslator();
690            dtr.setInputReader( new StringReader( title ) );
691            final WikiDocument doc = dtr.parse();
692            doc.setContext( m_context );
693
694            return XmlUtil.extractTextFromDocument( doc );
695        } catch( final IOException e ) {
696            LOG.fatal("Title parsing not working", e );
697            throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e );
698        }
699    }
700
701    /**
702     *  Returns XHTML for the heading.
703     *
704     *  @param level The level of the heading.  @see Heading
705     *  @param title the title for the heading
706     *  @param hd a List to which heading should be added
707     *  @return An Element containing the heading
708     */
709    public Element makeHeading( final int level, final String title, final Heading hd ) {
710        final Element el;
711        final String pageName = m_context.getPage().getName();
712        final String outTitle = makeSectionTitle( title );
713        hd.m_level = level;
714
715        switch( level ) {
716          case Heading.HEADING_SMALL:
717            el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
718            break;
719
720          case Heading.HEADING_MEDIUM:
721            el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
722            break;
723
724          case Heading.HEADING_LARGE:
725            el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
726            break;
727
728          default:
729            throw new InternalWikiException( "Illegal heading type " + level );
730        }
731
732        return el;
733    }
734
735    /**
736     *  When given a link to a WikiName, we just return a proper HTML link for it.  The local link mutator
737     *  chain is also called.
738     */
739    private Element makeCamelCaseLink( final String wikiname ) {
740        final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname );
741        callMutatorChain( m_localLinkMutatorChain, wikiname );
742        if( matchedLink != null ) {
743            makeLink( READ, matchedLink, wikiname, null, null );
744        } else {
745            makeLink( EDIT, wikiname, wikiname, null, null );
746        }
747
748        return m_currentElement;
749    }
750
751    /** Holds the image URL for the duration of this parser */
752    private String m_outlinkImageURL;
753
754    /**
755     * Returns an element for the external link image (out.png).  However, this method caches the URL for the lifetime
756     * of this MarkupParser, because it's commonly used, and we'll end up with possibly hundreds our thousands of
757     * references to it...  It's a lot faster, too.
758     *
759     * @return  An element containing the HTML for the outlink image.
760     */
761    private Element outlinkImage() {
762        Element el = null;
763        if( m_useOutlinkImage ) {
764            if( m_outlinkImageURL == null ) {
765                m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE );
766            }
767
768            el = new Element( "img" ).setAttribute( "class", OUTLINK );
769            el.setAttribute( "src", m_outlinkImageURL );
770            el.setAttribute( "alt","" );
771        }
772
773        return el;
774    }
775
776    /**
777     *  Takes a URL and turns it into a regular wiki link. Unfortunately, because of the way that flushPlainText()
778     *  works, it already encodes all the XML entities. But so does WikiContext.getURL(), so we
779     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
780     *  <p>
781     *  What a crappy problem.
782     *
783     * @param url provided url.
784     * @return An anchor Element containing the link.
785     */
786    private Element makeDirectURILink( String url ) {
787        final Element result;
788        String last = null;
789
790        if( url.endsWith( "," ) || url.endsWith( "." ) ) {
791            last = url.substring( url.length() - 1 );
792            url = url.substring( 0, url.length() - 1 );
793        }
794
795        callMutatorChain( m_externalLinkMutatorChain, url );
796
797        if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) {
798            result = handleImageLink( StringUtils.replace( url, "&amp;", "&" ), url, false );
799        } else {
800            result = makeLink( EXTERNAL, StringUtils.replace( url, "&amp;", "&" ), url, null, null );
801            addElement( outlinkImage() );
802        }
803
804        if( last != null ) {
805            m_plainTextBuf.append( last );
806        }
807
808        return result;
809    }
810
811    /**
812     *  Image links are handled differently:
813     *  1. If the text is a WikiName of an existing page, it gets linked.
814     *  2. If the text is an external link, then it is inlined.
815     *  3. Otherwise, it becomes an ALT text.
816     *
817     *  @param reallink The link to the image.
818     *  @param link     Link text portion, may be a link to somewhere else.
819     *  @param hasLinkText If true, then the defined link had a link text available.
820     *                  This means that the link text may be a link to a wiki page,
821     *                  or an external resource.
822     */
823    private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText ) {
824        final String possiblePage = MarkupParser.cleanLink( link );
825        if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) {
826            return makeLink( IMAGELINK, reallink, link, null, null );
827        } else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) {
828            callMutatorChain( m_localLinkMutatorChain, possiblePage );
829            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
830        } else {
831            return makeLink( IMAGE, reallink, link, null, null );
832        }
833    }
834
835    private Element handleAccessRule( String ruleLine ) {
836        if( m_wysiwygEditorMode ) {
837            m_currentElement.addContent( "[" + ruleLine + "]" );
838        }
839        if( !m_parseAccessRules ) {
840            return m_currentElement;
841        }
842        final Page page = m_context.getRealPage();
843        // UserDatabase db = m_context.getEngine().getUserDatabase();
844
845        if( ruleLine.startsWith( "{" ) ) {
846            ruleLine = ruleLine.substring( 1 );
847        }
848
849        if( ruleLine.endsWith( "}" ) ) {
850            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
851        }
852
853        LOG.debug("page={}, ACL = {}", page.getName(), ruleLine);
854
855        try {
856            final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine );
857            page.setAcl( acl );
858            LOG.debug( acl.toString() );
859        } catch( final WikiSecurityException wse ) {
860            return makeError( wse.getMessage() );
861        }
862
863        return m_currentElement;
864    }
865
866    /**
867     *  Handles metadata setting [{SET foo=bar}]
868     */
869    private Element handleMetadata( final String link ) {
870        if( m_wysiwygEditorMode ) {
871            m_currentElement.addContent( "[" + link + "]" );
872        }
873
874        try {
875            final String args = link.substring( link.indexOf(' '), link.length()-1 );
876            final String name = args.substring( 0, args.indexOf('=') ).trim();
877            String val  = args.substring( args.indexOf('=')+1 ).trim();
878
879            if( val.startsWith("'") ) {
880                val = val.substring( 1 );
881            }
882            if( val.endsWith("'") ) {
883                val = val.substring( 0, val.length()-1 );
884            }
885
886            // LOG.debug("SET name='"+name+"', value='"+val+"'.");
887
888            if( !name.isEmpty() && !val.isEmpty() ) {
889                val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val );
890                m_context.getPage().setAttribute( name, val );
891            }
892        } catch( final Exception e ) {
893            final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
894            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
895        }
896
897        return m_currentElement;
898    }
899
900    /**
901     *  Emits a processing instruction that will disable markup escaping. This is
902     *  very useful if you want to emit HTML directly into the stream.
903     */
904    private void disableOutputEscaping() {
905        addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) );
906    }
907
908    /**
909     *  Gobbles up all hyperlinks that are encased in square brackets.
910     */
911    private Element handleHyperlinks( String linktext, final int pos ) {
912        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
913        final StringBuilder sb = new StringBuilder( linktext.length() + 80 );
914
915        if( m_linkParsingOperations.isAccessRule( linktext ) ) {
916            return handleAccessRule( linktext );
917        }
918
919        if( m_linkParsingOperations.isMetadata( linktext ) ) {
920            return handleMetadata( linktext );
921        }
922
923        if( m_linkParsingOperations.isPluginLink( linktext ) ) {
924            try {
925                final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
926
927                // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not.
928                if( pluginContent != null ) {
929                    addElement( pluginContent );
930                    pluginContent.executeParse( m_context );
931                }
932            } catch( final PluginException e ) {
933                LOG.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
934                //LOG.info( "Root cause:",e.getRootThrowable() );
935                if( !m_wysiwygEditorMode ) {
936                    final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE );
937                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ),
938                                                                        m_context.getRealPage().getWiki(),
939                                                                        m_context.getRealPage().getName(),
940                                                                        e.getMessage() ) ) );
941                }
942            }
943            return m_currentElement;
944        }
945
946        try {
947            final LinkParser.Link link = m_linkParser.parse( linktext );
948            linktext = link.getText();
949            String linkref = link.getReference();
950            //  Yes, we now have the components separated.
951            //  linktext = the text the link should have
952            //  linkref  = the url or page name.
953            //  In many cases these are the same.  [linktext|linkref].
954            if( m_linkParsingOperations.isVariableLink( linktext ) ) {
955                final Content el = new VariableContent( linktext );
956                addElement( el );
957            } else if( m_linkParsingOperations.isExternalLink( linkref ) ) {
958                // It's an external link, out of this Wiki
959                callMutatorChain( m_externalLinkMutatorChain, linkref );
960                if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
961                    handleImageLink( linkref, linktext, link.hasReference() );
962                } else {
963                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
964                    addElement( outlinkImage() );
965                }
966            } else if( link.isInterwikiLink() ) {
967                // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved.
968
969                // FIXME: There is an interesting issue here:  We probably should
970                //        URLEncode the wikiPage, but we can't since some of the
971                //        Wikis use slashes (/), which won't survive URLEncoding.
972                //        Besides, we don't know which character set the other Wiki
973                //        is using, so you'll have to write the entire name as it appears
974                //        in the URL.  Bugger.
975
976                final String extWiki = link.getExternalWiki();
977                final String wikiPage = link.getExternalWikiPage();
978                if( m_wysiwygEditorMode ) {
979                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
980                } else {
981                    String urlReference = m_engine.getInterWikiURL( extWiki );
982                    if( urlReference != null ) {
983                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
984                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
985
986                        if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) {
987                            handleImageLink( urlReference, linktext, link.hasReference() );
988                        } else {
989                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
990                        }
991                        if( m_linkParsingOperations.isExternalLink( urlReference ) ) {
992                            addElement( outlinkImage() );
993                        }
994                    } else {
995                        final Object[] args = { escapeHTMLEntities( extWiki ) };
996                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
997                    }
998                }
999            } else if( linkref.startsWith( "#" ) ) {
1000                // It defines a local footnote
1001                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1002            } else if( TextUtil.isNumber( linkref ) ) {
1003                // It defines a reference to a local footnote
1004                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1005            } else {
1006                final int hashMark;
1007
1008                // Internal wiki link, but is it an attachment link?
1009                String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref );
1010                if( attachment != null ) {
1011                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1012                    if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
1013                        attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment );
1014                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1015                    } else {
1016                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1017                    }
1018                } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) {
1019                    // It's an internal Wiki link, but to a named section
1020                    final String namedSection = linkref.substring( hashMark + 1 );
1021                    linkref = linkref.substring( 0, hashMark );
1022                    linkref = MarkupParser.cleanLink( linkref );
1023                    callMutatorChain( m_localLinkMutatorChain, linkref );
1024                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1025                    if( matchedLink != null ) {
1026                        String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) );
1027                        sectref = sectref.replace( '%', '_' );
1028                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1029                    } else {
1030                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1031                    }
1032                } else {
1033                    // It's an internal Wiki link
1034                    linkref = MarkupParser.cleanLink( linkref );
1035                    callMutatorChain( m_localLinkMutatorChain, linkref );
1036                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1037                    if( matchedLink != null ) {
1038                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1039                    } else {
1040                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1041                    }
1042                }
1043            }
1044
1045        } catch( final ParseException e ) {
1046            LOG.info( "Parser failure: ", e );
1047            final Object[] args = { e.getMessage() };
1048            addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1049        }
1050        return m_currentElement;
1051    }
1052
1053    /**
1054     *  Pushes back any string that has been read.  It will obviously be pushed back in a reverse order.
1055     *
1056     *  @since 2.1.77
1057     */
1058    private void pushBack( final String s ) throws IOException {
1059        for( int i = s.length()-1; i >= 0; i-- ) {
1060            pushBack( s.charAt(i) );
1061        }
1062    }
1063
1064    private Element handleBackslash() throws IOException {
1065        final int ch = nextToken();
1066        if( ch == '\\' ) {
1067            final int ch2 = nextToken();
1068            if( ch2 == '\\' ) {
1069                pushElement( new Element( "br" ).setAttribute( "clear", "all" ) );
1070                return popElement( "br" );
1071            }
1072            pushBack( ch2 );
1073            pushElement( new Element( "br" ) );
1074            return popElement( "br" );
1075        }
1076        pushBack( ch );
1077        return null;
1078    }
1079
1080    private Element handleUnderscore() throws IOException {
1081        final int ch = nextToken();
1082        Element el = null;
1083        if( ch == '_' ) {
1084            if( m_isbold ) {
1085                el = popElement( "b" );
1086            } else {
1087                el = pushElement( new Element( "b" ) );
1088            }
1089            m_isbold = !m_isbold;
1090        } else {
1091            pushBack( ch );
1092        }
1093
1094        return el;
1095    }
1096
1097
1098    /**
1099     *  For example: italics.
1100     */
1101    private Element handleApostrophe() throws IOException {
1102        final int ch = nextToken();
1103        Element el = null;
1104
1105        if( ch == '\'' ) {
1106            if( m_isitalic ) {
1107                el = popElement( "i" );
1108            } else {
1109                el = pushElement( new Element( "i" ) );
1110            }
1111            m_isitalic = !m_isitalic;
1112        } else {
1113            pushBack( ch );
1114        }
1115
1116        return el;
1117    }
1118
1119    private Element handleOpenbrace( final boolean isBlock ) throws IOException {
1120        final int ch = nextToken();
1121        if( ch == '{' ) {
1122            final int ch2 = nextToken();
1123            if( ch2 == '{' ) {
1124                m_isPre = true;
1125                m_isEscaping = true;
1126                m_isPreBlock = isBlock;
1127                if( isBlock ) {
1128                    startBlockLevel();
1129                    return pushElement( new Element( "pre" ) );
1130                }
1131
1132                return pushElement( new Element( "span" ).setAttribute( "class", "inline-code" ) );
1133            }
1134            pushBack( ch2 );
1135            return pushElement( new Element( "tt" ) );
1136        }
1137        pushBack( ch );
1138        return null;
1139    }
1140
1141    /**
1142     *  Handles both }} and }}}
1143     */
1144    private Element handleClosebrace() throws IOException {
1145        final int ch2 = nextToken();
1146        if( ch2 == '}' ) {
1147            final int ch3 = nextToken();
1148            if( ch3 == '}' ) {
1149                if( m_isPre ) {
1150                    if( m_isPreBlock ) {
1151                        popElement( "pre" );
1152                    } else {
1153                        popElement( "span" );
1154                    }
1155                    m_isPre = false;
1156                    m_isEscaping = false;
1157                    return m_currentElement;
1158                }
1159                m_plainTextBuf.append( "}}}" );
1160                return m_currentElement;
1161            }
1162            pushBack( ch3 );
1163            if( !m_isEscaping ) {
1164                return popElement( "tt" );
1165            }
1166        }
1167        pushBack( ch2 );
1168        return null;
1169    }
1170
1171    private Element handleDash() throws IOException {
1172        int ch = nextToken();
1173        if( ch == '-' ) {
1174            final int ch2 = nextToken();
1175            if( ch2 == '-' ) {
1176                final int ch3 = nextToken();
1177                if( ch3 == '-' ) {
1178                    // Empty away all the rest of the dashes.
1179                    // Do not forget to return the first non-match back.
1180                    do {
1181                        ch = nextToken();
1182                    } while ( ch == '-' );
1183
1184                    pushBack( ch );
1185                    startBlockLevel();
1186                    pushElement( new Element( "hr" ) );
1187                    return popElement( "hr" );
1188                }
1189                pushBack( ch3 );
1190            }
1191            pushBack( ch2 );
1192        }
1193        pushBack( ch );
1194        return null;
1195    }
1196
1197    private Element handleHeading() throws IOException {
1198        final Element el;
1199        final int ch  = nextToken();
1200        final Heading hd = new Heading();
1201        if( ch == '!' ) {
1202            final int ch2 = nextToken();
1203            if( ch2 == '!' ) {
1204                final String title = peekAheadLine();
1205                el = makeHeading( Heading.HEADING_LARGE, title, hd );
1206            } else {
1207                pushBack( ch2 );
1208                final String title = peekAheadLine();
1209                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1210            }
1211        } else {
1212            pushBack( ch );
1213            final String title = peekAheadLine();
1214            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1215        }
1216
1217        callHeadingListenerChain( hd );
1218        m_lastHeading = hd;
1219        if( el != null ) {
1220            pushElement( el );
1221        }
1222        return el;
1223    }
1224
1225    /**
1226     * Reads the stream until the next EOL or EOF.  Note that it will also read the EOL from the stream.
1227     */
1228    private StringBuilder readUntilEOL() throws IOException {
1229        int ch;
1230        final StringBuilder buf = new StringBuilder( 256 );
1231        while( true ) {
1232            ch = nextToken();
1233            if( ch == -1 ) {
1234                break;
1235            }
1236            buf.append( (char) ch );
1237            if( ch == '\n' ) {
1238                break;
1239            }
1240        }
1241        return buf;
1242    }
1243
1244    /** Controls whether italic is restarted after a paragraph shift */
1245
1246    private boolean m_restartitalic;
1247    private boolean m_restartbold;
1248
1249    private boolean m_newLine;
1250
1251    /**
1252     * Starts a block level element, therefore closing a potential open paragraph tag.
1253     */
1254    private void startBlockLevel() {
1255        // These may not continue over block level limits in XHTML
1256        popElement( "i" );
1257        popElement( "b" );
1258        popElement( "tt" );
1259        if( m_isOpenParagraph ) {
1260            m_isOpenParagraph = false;
1261            popElement( "p" );
1262            m_plainTextBuf.append( "\n" ); // Just small beautification
1263        }
1264        m_restartitalic = m_isitalic;
1265        m_restartbold   = m_isbold;
1266        m_isitalic = false;
1267        m_isbold   = false;
1268    }
1269
1270    private static String getListType( final char c ) {
1271        if( c == '*' ) {
1272            return "ul";
1273        } else if( c == '#' ) {
1274            return "ol";
1275        }
1276        throw new InternalWikiException( "Parser got faulty list type: " + c );
1277    }
1278    /**
1279     * Like original handleOrderedList() and handleUnorderedList(),
1280     * however handles both ordered ('#') and unordered ('*') mixed together.
1281     */
1282    // FIXME: Refactor this; it's a bit messy.
1283    private Element handleGeneralList() throws IOException {
1284         startBlockLevel();
1285         String strBullets = readWhile( "*#" );
1286         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
1287         final int numBullets = strBullets.length();
1288
1289         // override the beginning portion of bullet pattern to be like the previous to simulate PHPWiki style lists
1290
1291        if( m_allowPHPWikiStyleLists ) {
1292            // only substitute if different
1293            if( !( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) ) {
1294                if( numBullets <= m_genlistlevel ) {
1295                    // Substitute all but the last character (keep the expressed bullet preference)
1296                    strBullets = ( numBullets > 1 ? m_genlistBulletBuffer.substring( 0, numBullets - 1 ) : "" ) +
1297                                 strBullets.charAt( numBullets - 1 );
1298                } else {
1299                    strBullets = m_genlistBulletBuffer + strBullets.substring( m_genlistlevel, numBullets );
1300                }
1301            }
1302        }
1303
1304         //  Check if this is still of the same type
1305        if( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) {
1306            if( numBullets > m_genlistlevel ) {
1307                pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel++ ) ) ) );
1308                for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) {
1309                    // bullets are growing, get from new bullet list
1310                    pushElement( new Element( "li" ) );
1311                    pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel ) ) ) );
1312                }
1313            } else if( numBullets < m_genlistlevel ) {
1314                //  Close the previous list item.
1315                popElement( "li" );
1316                for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) {
1317                    // bullets are shrinking, get from old bullet list
1318                    popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) );
1319                    if( m_genlistlevel > 0 ) {
1320                        popElement( "li" );
1321                    }
1322                }
1323            } else {
1324                if( m_genlistlevel > 0 ) {
1325                    popElement( "li" );
1326                }
1327            }
1328        } else {
1329            //  The pattern has changed, unwind and restart
1330            int numEqualBullets;
1331            final int numCheckBullets;
1332
1333            // find out how much is the same
1334            numEqualBullets = 0;
1335            numCheckBullets = Math.min( numBullets, m_genlistlevel );
1336
1337            while( numEqualBullets < numCheckBullets ) {
1338                // if the bullets are equal so far, keep going
1339                if( strBullets.charAt( numEqualBullets ) == m_genlistBulletBuffer.charAt( numEqualBullets ) )
1340                    numEqualBullets++;
1341                    // otherwise giveup, we have found how many are equal
1342                else
1343                    break;
1344            }
1345
1346            //unwind
1347            for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) {
1348                popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) );
1349                if( m_genlistlevel > numBullets ) {
1350                    popElement( "li" );
1351                }
1352            }
1353
1354            //rewind
1355            pushElement( new Element( getListType( strBullets.charAt( numEqualBullets++ ) ) ) );
1356            for( int i = numEqualBullets; i < numBullets; i++ ) {
1357                pushElement( new Element( "li" ) );
1358                pushElement( new Element( getListType( strBullets.charAt( i ) ) ) );
1359            }
1360            m_genlistlevel = numBullets;
1361        }
1362
1363         // Push a new list item, and eat away any extra whitespace
1364        pushElement( new Element( "li" ) );
1365        readWhile( " " );
1366
1367        // work done, remember the new bullet list (in place of old one)
1368        m_genlistBulletBuffer.setLength( 0 );
1369        m_genlistBulletBuffer.append( strBullets );
1370        return m_currentElement;
1371    }
1372
1373    private Element unwindGeneralList() {
1374        // unwind
1375        for( ; m_genlistlevel > 0; m_genlistlevel-- ) {
1376            popElement( "li" );
1377            popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) );
1378        }
1379        m_genlistBulletBuffer.setLength( 0 );
1380        return null;
1381    }
1382
1383
1384    private Element handleDefinitionList() {
1385        if( !m_isdefinition ) {
1386            m_isdefinition = true;
1387            startBlockLevel();
1388            pushElement( new Element( "dl" ) );
1389            return pushElement( new Element( "dt" ) );
1390        }
1391        return null;
1392    }
1393
1394    private Element handleOpenbracket() throws IOException {
1395        final StringBuilder sb = new StringBuilder( 40 );
1396        final int pos = getPosition();
1397        int ch = nextToken();
1398        boolean isPlugin = false;
1399        if( ch == '[' ) {
1400            if( m_wysiwygEditorMode ) {
1401                sb.append( '[' );
1402            }
1403            sb.append( ( char )ch );
1404            while( ( ch = nextToken() ) == '[' ) {
1405                sb.append( ( char )ch );
1406            }
1407        }
1408
1409        if( ch == '{' ) {
1410            isPlugin = true;
1411        }
1412
1413        pushBack( ch );
1414
1415        if( sb.length() > 0 ) {
1416            m_plainTextBuf.append( sb );
1417            return m_currentElement;
1418        }
1419
1420        //  Find end of hyperlink
1421        ch = nextToken();
1422        int nesting = 1; // Check for nested plugins
1423        while( ch != -1 ) {
1424            final int ch2 = nextToken();
1425            pushBack( ch2 );
1426            if( isPlugin ) {
1427                if( ch == '[' && ch2 == '{' ) {
1428                    nesting++;
1429                } else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) {
1430                    break;
1431                } else if( ch == '}' && ch2 == ']' ) {
1432                    // NB: This will be decremented once at the end
1433                    nesting--;
1434                }
1435            } else {
1436                if( ch == ']' ) {
1437                    break;
1438                }
1439            }
1440
1441            sb.append( (char) ch );
1442
1443            ch = nextToken();
1444        }
1445
1446        //  If the link is never finished, do some tricks to display the rest of the line unchanged.
1447        if( ch == -1 ) {
1448            LOG.debug( "Warning: unterminated link detected!" );
1449            m_isEscaping = true;
1450            m_plainTextBuf.append( sb );
1451            flushPlainText();
1452            m_isEscaping = false;
1453            return m_currentElement;
1454        }
1455
1456        return handleHyperlinks( sb.toString(), pos );
1457    }
1458
1459    /**
1460     *  Reads the stream until the current brace is closed or stream end.
1461     */
1462    private String readBraceContent( final char opening, final char closing ) throws IOException {
1463        final StringBuilder sb = new StringBuilder( 40 );
1464        int braceLevel = 1;
1465        int ch;
1466        while( ( ch = nextToken() ) != -1 ) {
1467            if( ch == '\\' ) {
1468                continue;
1469            } else if( ch == opening ) {
1470                braceLevel++;
1471            } else if( ch == closing ) {
1472                braceLevel--;
1473                if( braceLevel == 0 ) {
1474                    break;
1475                }
1476            }
1477            sb.append( ( char ) ch );
1478        }
1479        return sb.toString();
1480    }
1481
1482
1483    /**
1484     * Handles constructs of type %%(style) and %%class
1485     * @param newLine
1486     * @return An Element containing the div or span, depending on the situation.
1487     * @throws IOException
1488     */
1489    private Element handleDiv( final boolean newLine ) throws IOException {
1490        int ch = nextToken();
1491        Element el = null;
1492
1493        if( ch == '%' ) {
1494            String style = null;
1495            String clazz = null;
1496
1497            ch = nextToken();
1498
1499            //  Style or class?
1500            if( ch == '(' ) {
1501                style = readBraceContent('(',')');
1502            } else if( Character.isLetter( (char) ch ) ) {
1503                pushBack( ch );
1504                clazz = readUntil( "( \t\n\r" );
1505                //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters
1506                //CSS Classnames can contain only the characters [a-zA-Z0-9] and
1507                //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_".
1508                //They cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1509
1510                //(1) replace '.' by spaces, allowing multiple classnames on a div or span
1511                //(2) remove any invalid character
1512                if( clazz != null ) {
1513                    clazz = clazz.replace( '.', ' ' )
1514                                 .replaceAll( "[^\\s-_\\w\\x200-\\x377]+", "" );
1515                }
1516                ch = nextToken();
1517
1518                // check for %%class1.class2( style information )
1519                if( ch == '(' ) {
1520                    style = readBraceContent( '(', ')' );
1521                //  Pop out only spaces, so that the upcoming EOL check does not check the next line.
1522                } else if( ch == '\n' || ch == '\r' ) {
1523                    pushBack( ch );
1524                }
1525            } else {
1526                // Anything else stops.
1527                pushBack( ch );
1528                try {
1529                    final Boolean isSpan = m_styleStack.pop();
1530                    if( isSpan == null ) {
1531                        // Fail quietly
1532                    } else if( isSpan ) {
1533                        el = popElement( "span" );
1534                    } else {
1535                        el = popElement( "div" );
1536                    }
1537                } catch( final EmptyStackException e ) {
1538                    LOG.debug( "Page '" + m_context.getName() + "' closes a %%-block that has not been opened." );
1539                    return m_currentElement;
1540                }
1541                return el;
1542            }
1543
1544            //  Check if there is an attempt to do something nasty
1545            try {
1546                style = StringEscapeUtils.unescapeHtml4(style);
1547                if( style != null && style.contains( "javascript:" ) ) {
1548                    LOG.debug( "Attempt to output javascript within CSS: {}", style );
1549                    final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1550                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
1551                }
1552            } catch( final NumberFormatException e ) {
1553                //  If there are unknown entities, we don't want the parser to stop.
1554                final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1555                final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
1556                return addElement( makeError( msg ) );
1557            }
1558
1559            //  Decide if we should open a div or a span?
1560            final String eol = peekAheadLine();
1561
1562            if( !eol.trim().isEmpty() ) {
1563                // There is stuff after the class
1564                el = new Element("span");
1565                m_styleStack.push( Boolean.TRUE );
1566            } else {
1567                startBlockLevel();
1568                el = new Element("div");
1569                m_styleStack.push( Boolean.FALSE );
1570            }
1571
1572            if( style != null ) el.setAttribute("style", style);
1573            if( clazz != null ) el.setAttribute("class", clazz);
1574            return pushElement( el );
1575        }
1576        pushBack( ch );
1577        return el;
1578    }
1579
1580    private Element handleSlash( final boolean newLine ) throws IOException {
1581        final int ch = nextToken();
1582        pushBack( ch );
1583        if( ch == '%' && !m_styleStack.isEmpty() ) {
1584            return handleDiv( newLine );
1585        }
1586
1587        return null;
1588    }
1589
1590    private Element handleBar( final boolean newLine ) throws IOException {
1591        Element el;
1592        if( !m_istable && !newLine ) {
1593            return null;
1594        }
1595
1596        //  If the bar is in the first column, we will either start a new table or continue the old one.
1597        if( newLine ) {
1598            if( !m_istable ) {
1599                startBlockLevel();
1600                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
1601                m_istable = true;
1602                m_rowNum = 0;
1603            }
1604
1605            m_rowNum++;
1606            final Element tr = ( m_rowNum % 2 != 0 )
1607                       ? new Element("tr").setAttribute("class", "odd")
1608                       : new Element("tr");
1609            el = pushElement( tr );
1610        }
1611
1612        //  Check out which table cell element to start; a header element (th) or a regular element (td).
1613        final int ch = nextToken();
1614        if( ch == '|' ) {
1615            if( !newLine ) {
1616                el = popElement("th");
1617                if( el == null ) popElement("td");
1618            }
1619            el = pushElement( new Element("th") );
1620        } else {
1621            if( !newLine ) {
1622                el = popElement( "td" );
1623                if( el == null ) popElement( "th" );
1624            }
1625            el = pushElement( new Element("td") );
1626            pushBack( ch );
1627        }
1628        return el;
1629    }
1630
1631    /**
1632     *  Generic escape of next character or entity.
1633     */
1634    private Element handleTilde() throws IOException {
1635        final int ch = nextToken();
1636
1637        if( ch == ' ' ) {
1638            if( m_wysiwygEditorMode ) {
1639                m_plainTextBuf.append( "~ " );
1640            }
1641            return m_currentElement;
1642        }
1643
1644        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
1645            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
1646            ch == '{' || ch == ']' || ch == '}' || ch == '%' ) {
1647            if( m_wysiwygEditorMode ) {
1648                m_plainTextBuf.append( '~' );
1649            }
1650            m_plainTextBuf.append( ( char ) ch );
1651            m_plainTextBuf.append( readWhile( "" + ( char ) ch ) );
1652            return m_currentElement;
1653        }
1654        // No escape.
1655        pushBack( ch );
1656        return null;
1657    }
1658
1659    private void fillBuffer( final Element startElement ) throws IOException {
1660        m_currentElement = startElement;
1661        m_newLine = true;
1662        boolean quitReading = false;
1663        disableOutputEscaping();
1664        while( !quitReading ) {
1665            final int ch = nextToken();
1666            if( ch == -1 ) {
1667                break;
1668            }
1669
1670            //  Check if we're actually ending the preformatted mode. We still must do an entity transformation here.
1671            if( m_isEscaping ) {
1672                if( ch == '}' ) {
1673                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
1674                } else if( ch == -1 ) {
1675                    quitReading = true;
1676                }
1677                else if( ch == '\r' ) {
1678                    // DOS line feeds we ignore.
1679                } else if( ch == '<' ) {
1680                    m_plainTextBuf.append( "&lt;" );
1681                } else if( ch == '>' ) {
1682                    m_plainTextBuf.append( "&gt;" );
1683                } else if( ch == '&' ) {
1684                    m_plainTextBuf.append( "&amp;" );
1685                } else if( ch == '~' ) {
1686                    String braces = readWhile( "}" );
1687                    if( braces.length() >= 3 ) {
1688                        m_plainTextBuf.append( "}}}" );
1689                        braces = braces.substring(3);
1690                    } else {
1691                        m_plainTextBuf.append( (char) ch );
1692                    }
1693
1694                    for( int i = braces.length()-1; i >= 0; i-- ) {
1695                        pushBack( braces.charAt( i ) );
1696                    }
1697                } else {
1698                    m_plainTextBuf.append( (char) ch );
1699                }
1700
1701                continue;
1702            }
1703
1704            //  An empty line stops a list
1705            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) {
1706                m_plainTextBuf.append(unwindGeneralList());
1707            }
1708
1709            if( m_newLine && ch != '|' && m_istable ) {
1710                popElement( "table" );
1711                m_istable = false;
1712            }
1713
1714            int skip = IGNORE;
1715            //  Do the actual parsing and catch any errors.
1716            try {
1717                skip = parseToken( ch );
1718            } catch( final IllegalDataException e ) {
1719                LOG.info( "Page {} contains data which cannot be added to DOM tree: {}", m_context.getPage().getName(), e.getMessage() );
1720                makeError( "Error: " + cleanupSuspectData( e.getMessage() ) );
1721            }
1722
1723            // The idea is as follows:  If the handler method returns an element (el != null), it is assumed that it
1724            // has been added in the stack.  Otherwise, the character is added as is to the plaintext buffer.
1725            //
1726            // For the transition phase, if s != null, it also gets added in the plaintext buffer.
1727            switch( skip ) {
1728                case ELEMENT:
1729                    m_newLine = false;
1730                    break;
1731
1732                case CHARACTER:
1733                    m_plainTextBuf.append( (char) ch );
1734                    m_newLine = false;
1735                    break;
1736
1737                case IGNORE:
1738                default:
1739                    break;
1740            }
1741        }
1742
1743        closeHeadings();
1744        popElement( "domroot" );
1745    }
1746
1747    private String cleanupSuspectData( final String s ) {
1748        final StringBuilder sb = new StringBuilder( s.length() );
1749        for( int i = 0; i < s.length(); i++ ) {
1750            final char c = s.charAt(i);
1751            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
1752            else sb.append( "0x" ).append( Integer.toString( c, 16 ).toUpperCase() );
1753        }
1754
1755        return sb.toString();
1756    }
1757
1758    /** The token is a plain character. */
1759    protected static final int CHARACTER = 0;
1760
1761    /** The token is a wikimarkup element. */
1762    protected static final int ELEMENT   = 1;
1763
1764    /** The token is to be ignored. */
1765    protected static final int IGNORE    = 2;
1766
1767    /**
1768     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
1769     *  you think this was a wiki markup element, and IGNORE, if you think
1770     *  we should ignore this altogether.
1771     *  <p>
1772     *  To add your own MarkupParser, you can override this method, but it
1773     *  is recommended that you call super.parseToken() as well to gain advantage
1774     *  of JSPWiki's own markup.  You can call it at the start of your own
1775     *  parseToken() or end - it does not matter.
1776     *
1777     * @param ch The character under investigation
1778     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
1779     * @throws IOException If parsing fails.
1780     */
1781    protected int parseToken( final int ch ) throws IOException {
1782        Element el = null;
1783        //  Now, check the incoming token.
1784        switch( ch ) {
1785          case '\r':
1786            // DOS linefeeds we forget
1787            return IGNORE;
1788
1789          case '\n':
1790            //  Close things like headings, etc.
1791            // FIXME: This is not really very fast
1792            closeHeadings();
1793
1794            popElement( "dl" ); // Close definition lists.
1795            if( m_istable ) {
1796                popElement("tr");
1797            }
1798            m_isdefinition = false;
1799            if( m_newLine ) {
1800                // Paragraph change.
1801                startBlockLevel();
1802                //  Figure out which elements cannot be enclosed inside a <p></p> pair according to XHTML rules.
1803                final String nextLine = peekAheadLine();
1804                if( nextLine.isEmpty() ||
1805                     ( !nextLine.isEmpty() &&
1806                       !nextLine.startsWith( "{{{" ) &&
1807                       !nextLine.startsWith( "----" ) &&
1808                       !nextLine.startsWith( "%%" ) &&
1809                       "*#!;".indexOf( nextLine.charAt( 0 ) ) == -1 ) ) {
1810                    pushElement( new Element( "p" ) );
1811                    m_isOpenParagraph = true;
1812
1813                    if( m_restartitalic ) {
1814                        pushElement( new Element( "i" ) );
1815                        m_isitalic = true;
1816                        m_restartitalic = false;
1817                    }
1818                    if( m_restartbold ) {
1819                        pushElement( new Element( "b" ) );
1820                        m_isbold = true;
1821                        m_restartbold = false;
1822                    }
1823                }
1824            } else {
1825                m_plainTextBuf.append("\n");
1826                m_newLine = true;
1827            }
1828            return IGNORE;
1829
1830          case '\\':
1831            el = handleBackslash();
1832            break;
1833
1834          case '_':
1835            el = handleUnderscore();
1836            break;
1837
1838          case '\'':
1839            el = handleApostrophe();
1840            break;
1841
1842          case '{':
1843            el = handleOpenbrace( m_newLine );
1844            break;
1845
1846          case '}':
1847            el = handleClosebrace();
1848            break;
1849
1850          case '-':
1851            if( m_newLine ) {
1852                el = handleDash();
1853            }
1854            break;
1855
1856          case '!':
1857            if( m_newLine ) {
1858                el = handleHeading();
1859            }
1860            break;
1861
1862          case ';':
1863            if( m_newLine ) {
1864                el = handleDefinitionList();
1865            }
1866            break;
1867
1868          case ':':
1869            if( m_isdefinition ) {
1870                popElement( "dt" );
1871                el = pushElement( new Element( "dd" ) );
1872                m_isdefinition = false;
1873            }
1874            break;
1875
1876          case '[':
1877            el = handleOpenbracket();
1878            break;
1879
1880          case '*':
1881            if( m_newLine ) {
1882                pushBack( '*' );
1883                el = handleGeneralList();
1884            }
1885            break;
1886
1887          case '#':
1888            if( m_newLine ) {
1889                pushBack( '#' );
1890                el = handleGeneralList();
1891            }
1892            break;
1893
1894          case '|':
1895            el = handleBar( m_newLine );
1896            break;
1897
1898          case '~':
1899            el = handleTilde();
1900            break;
1901
1902          case '%':
1903            el = handleDiv( m_newLine );
1904            break;
1905
1906          case '/':
1907            el = handleSlash( m_newLine );
1908            break;
1909
1910          default:
1911            break;
1912        }
1913
1914        return el != null ? ELEMENT : CHARACTER;
1915    }
1916
1917    private void closeHeadings() {
1918        if( m_lastHeading != null && !m_wysiwygEditorMode ) {
1919            // Add the hash anchor element at the end of the heading
1920            addElement( new Element("a").setAttribute( "class",HASHLINK )
1921                                              .setAttribute( "href","#" + m_lastHeading.m_titleAnchor )
1922                                              .setText( "#" ) );
1923            m_lastHeading = null;
1924        }
1925        popElement( "h2" );
1926        popElement( "h3" );
1927        popElement( "h4" );
1928    }
1929
1930    /**
1931     *  Parses the entire document from the Reader given in the constructor or set by {@link #setInputReader(Reader)}.
1932     *
1933     *  @return A WikiDocument, ready to be passed to the renderer.
1934     *  @throws IOException If parsing cannot be accomplished.
1935     */
1936    @Override
1937    public WikiDocument parse() throws IOException {
1938        final WikiDocument d = new WikiDocument( m_context.getPage() );
1939        d.setContext( m_context );
1940        final Element rootElement = new Element( "domroot" );
1941        d.setRootElement( rootElement );
1942        fillBuffer( rootElement );
1943        paragraphify( rootElement );
1944
1945        return d;
1946    }
1947
1948    /**
1949     *  Checks out that the first paragraph is correctly installed.
1950     *
1951     *  @param rootElement element to be checked.
1952     */
1953    private void paragraphify( final Element rootElement) {
1954        //  Add the paragraph tag to the first paragraph
1955        final List< Content > kids = rootElement.getContent();
1956        if( rootElement.getChild( "p" ) != null ) {
1957            final ArrayList<Content> ls = new ArrayList<>();
1958            int idxOfFirstContent = 0;
1959            int count = 0;
1960
1961            for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) {
1962                final Content c = i.next();
1963                if( c instanceof Element ) {
1964                    final String name = ( ( Element )c ).getName();
1965                    if( isBlockLevel( name ) ) {
1966                        break;
1967                    }
1968                }
1969
1970                if( !( c instanceof ProcessingInstruction ) ) {
1971                    ls.add( c );
1972                    if( idxOfFirstContent == 0 ) {
1973                        idxOfFirstContent = count;
1974                    }
1975                }
1976            }
1977
1978            //  If there were any elements, then add a new <p> (unless it would be an empty one)
1979            if( ls.size() > 0 ) {
1980                final Element newel = new Element("p");
1981                for( final Content c : ls ) {
1982                    c.detach();
1983                    newel.addContent( c );
1984                }
1985
1986                // Make sure there are no empty <p/> tags added.
1987                if( !newel.getTextTrim().isEmpty() || !newel.getChildren().isEmpty() ) {
1988                    rootElement.addContent( idxOfFirstContent, newel );
1989                }
1990            }
1991        }
1992    }
1993
1994}