Source code

001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.commons.text.StringEscapeUtils;
023import org.apache.logging.log4j.LogManager;
024import org.apache.logging.log4j.Logger;
025import org.apache.oro.text.regex.MalformedPatternException;
026import org.apache.oro.text.regex.MatchResult;
027import org.apache.oro.text.regex.Pattern;
028import org.apache.oro.text.regex.PatternCompiler;
029import org.apache.oro.text.regex.PatternMatcher;
030import org.apache.oro.text.regex.Perl5Compiler;
031import org.apache.oro.text.regex.Perl5Matcher;
032import org.apache.wiki.InternalWikiException;
033import org.apache.wiki.StringTransmutator;
034import org.apache.wiki.api.core.Acl;
035import org.apache.wiki.api.core.Context;
036import org.apache.wiki.api.core.ContextEnum;
037import org.apache.wiki.api.core.Page;
038import org.apache.wiki.api.exceptions.PluginException;
039import org.apache.wiki.api.plugin.Plugin;
040import org.apache.wiki.api.spi.Wiki;
041import org.apache.wiki.attachment.AttachmentManager;
042import org.apache.wiki.auth.AuthorizationManager;
043import org.apache.wiki.auth.UserManager;
044import org.apache.wiki.auth.WikiSecurityException;
045import org.apache.wiki.auth.acl.AclManager;
046import org.apache.wiki.i18n.InternationalizationManager;
047import org.apache.wiki.preferences.Preferences;
048import org.apache.wiki.util.TextUtil;
049import org.apache.wiki.util.XmlUtil;
050import org.apache.wiki.variables.VariableManager;
051import org.jdom2.Attribute;
052import org.jdom2.Content;
053import org.jdom2.Element;
054import org.jdom2.IllegalDataException;
055import org.jdom2.ProcessingInstruction;
056import org.jdom2.Verifier;
057
058import javax.xml.transform.Result;
059import java.io.IOException;
060import java.io.Reader;
061import java.io.StringReader;
062import java.text.MessageFormat;
063import java.util.ArrayList;
064import java.util.Arrays;
065import java.util.Collection;
066import java.util.EmptyStackException;
067import java.util.HashMap;
068import java.util.Iterator;
069import java.util.List;
070import java.util.Map;
071import java.util.Properties;
072import java.util.ResourceBundle;
073import java.util.Stack;
074
075/**
076 * Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the heart and soul of JSPWiki : make
077 * sure you test properly anything that is added, or else it breaks down horribly.
078 *
079 *  @since  2.4
080 */
081public class JSPWikiMarkupParser extends MarkupParser {
082
083    protected static final int              READ          = 0;
084    protected static final int              EDIT          = 1;
085    protected static final int              EMPTY         = 2;  // Empty message
086    protected static final int              LOCAL         = 3;
087    protected static final int              LOCALREF      = 4;
088    protected static final int              IMAGE         = 5;
089    protected static final int              EXTERNAL      = 6;
090    protected static final int              INTERWIKI     = 7;
091    protected static final int              IMAGELINK     = 8;
092    protected static final int              IMAGEWIKILINK = 9;
093    protected static final int              ATTACHMENT    = 10;
094
095    private static final Logger LOG = LogManager.getLogger( JSPWikiMarkupParser.class );
096
097    private boolean        m_isbold;
098    private boolean        m_isitalic;
099    private boolean        m_istable;
100    private boolean        m_isPre;
101    private boolean        m_isEscaping;
102    private boolean        m_isdefinition;
103    private boolean        m_isPreBlock;
104
105    /** Contains style information, in multiple forms. */
106    private final Stack< Boolean > m_styleStack = new Stack<>();
107
108     // general list handling
109    private int m_genlistlevel;
110    private final StringBuilder m_genlistBulletBuffer = new StringBuilder( 10 );  // stores the # and * pattern
111    private final boolean m_allowPHPWikiStyleLists = true;
112
113    private boolean m_isOpenParagraph;
114
115    /** Parser for extended link functionality. */
116    private final LinkParser m_linkParser = new LinkParser();
117
118    /** Keeps track of any plain text that gets put in the Text nodes */
119    private StringBuilder m_plainTextBuf = new StringBuilder( 20 );
120
121    private Element m_currentElement;
122
123    /** Keep track of duplicate header names.  */
124    private final Map< String, Integer > m_titleSectionCounter = new HashMap<>();
125
126    /** If true, then considers CamelCase links as well. */
127    private boolean m_camelCaseLinks;
128
129    /** If true, then generate special output for wysiwyg editing in certain cases */
130    private boolean m_wysiwygEditorMode;
131
132    /** If true, consider URIs that have no brackets as well. */
133    // FIXME: Currently reserved, but not used.
134    private boolean m_plainUris;
135
136    /** If true, all outward links use a small link image. */
137    private boolean m_useOutlinkImage = true;
138
139    private boolean m_useAttachmentImage = true;
140
141    /** If true, allows raw HTML. */
142    private boolean m_allowHTML;
143
144    private boolean m_useRelNofollow;
145
146    private final PatternCompiler m_compiler = new Perl5Compiler();
147
148    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
149
150    private final PatternMatcher m_camelCaseMatcher = new Perl5Matcher();
151    private Pattern m_camelCasePattern;
152
153    private int m_rowNum = 1;
154
155    private Heading m_lastHeading;
156
157    private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern";
158
159    /**
160     *  Creates a markup parser.
161     *
162     *  @param context The WikiContext which controls the parsing
163     *  @param in Where the data is read from.
164     */
165    public JSPWikiMarkupParser( final Context context, final Reader in ) {
166        super( context, in );
167        initialize();
168    }
169
170    // FIXME: parsers should be pooled for better performance.
171    private void initialize() {
172        initInlineImagePatterns();
173
174        m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN );
175        if( m_camelCasePattern == null ) {
176            try {
177                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
178            } catch( final MalformedPatternException e ) {
179                LOG.fatal("Internal error: Someone put in a faulty pattern.",e);
180                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e);
181            }
182            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
183        }
184
185        //  Set the properties.
186        final Properties props = m_engine.getWikiProperties();
187        final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
188
189        if( cclinks != null ) {
190            m_camelCaseLinks = TextUtil.isPositive( cclinks );
191        } else {
192            m_camelCaseLinks  = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks );
193        }
194
195        final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE );
196        if( wysiwygVariable != null ) {
197            m_wysiwygEditorMode = wysiwygVariable;
198        }
199
200        m_plainUris          = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris );
201        m_useOutlinkImage    = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage );
202        m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage );
203        m_allowHTML          = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML );
204        m_useRelNofollow     = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow );
205
206        if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) {
207            disableAccessRules();
208        }
209
210        m_context.getPage().setHasMetadata();
211    }
212
213    /**
214     *  Calls a transmutator chain.
215     *
216     *  @param list Chain to call
217     *  @param text Text that should be passed to the mutate() method of each of the mutators in the chain.
218     *  @return The result of the mutation.
219     */
220    protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) {
221        if( list == null || list.size() == 0 ) {
222            return text;
223        }
224
225        for( final StringTransmutator m : list ) {
226            text = m.mutate( m_context, text );
227        }
228
229        return text;
230    }
231
232    /**
233     * Calls the heading listeners.
234     *
235     * @param param A Heading object.
236     */
237    private void callHeadingListenerChain( final Heading param ) {
238        for( final HeadingListener h : m_headingListenerChain ) {
239            h.headingAdded( m_context, param );
240        }
241    }
242
243    /**
244     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation,
245     *  if you really know what you are doing.
246     *
247     *  @param type One of the types above
248     *  @param link URL to which to link to
249     *  @param text Link text
250     *  @param section If a particular section identifier is required.
251     *  @return An A element.
252     *  @since 2.4.78
253     */
254    private Element createAnchor( final int type, final String link, String text, String section ) {
255        text = escapeHTMLEntities( text );
256        section = escapeHTMLEntities( section );
257        final Element el = new Element( "a" );
258        el.setAttribute( "class", CLASS_TYPES[ type ] );
259        el.setAttribute( "href", link + section );
260        el.addContent( text );
261        return el;
262    }
263
264    private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes ) {
265        Element el = null;
266        if( text == null ) {
267            text = link;
268        }
269        text = callMutatorChain( m_linkMutators, text );
270        section = (section != null) ? ("#"+section) : "";
271
272        // Make sure we make a link name that can be accepted  as a valid URL.
273        if( link.isEmpty() ) {
274            type = EMPTY;
275        }
276        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
277
278        switch( type ) {
279            case READ:
280                el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section );
281                break;
282
283            case EDIT:
284                el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" );
285                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
286                break;
287
288            case EMPTY:
289                el = new Element("u").addContent(text);
290                break;
291
292            // These two are for local references - footnotes and references to footnotes.
293            // We embed the page name (or whatever WikiContext gives us) to make sure the links are unique across Wiki.
294            case LOCALREF:
295                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
296                break;
297
298            case LOCAL:
299                el = new Element( "a" ).setAttribute( "class", CLASS_FOOTNOTE );
300                el.setAttribute( "name", "ref-" + m_context.getName() + "-" + link.substring( 1 ) );
301                el.addContent( "[" + text + "]" );
302                break;
303
304                //  With the image, external and interwiki types we need to make sure nobody can put in Javascript or
305                //  something else annoying into the links themselves.  We do this by preventing a haxor from stopping
306                //  the link name short with quotes in fillBuffer().
307            case IMAGE:
308                el = new Element( "img" ).setAttribute( "class", "inline" );
309                el.setAttribute( "src", link );
310                el.setAttribute( "alt", text );
311                break;
312
313            case IMAGELINK:
314                el = new Element( "img" ).setAttribute( "class", "inline" );
315                el.setAttribute( "src", link );
316                el.setAttribute( "alt", text );
317                el = createAnchor( IMAGELINK, text, "", "" ).addContent( el );
318                break;
319
320            case IMAGEWIKILINK:
321                final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text );
322                el = new Element( "img" ).setAttribute( "class", "inline" );
323                el.setAttribute( "src", link );
324                el.setAttribute( "alt", text );
325                el = createAnchor( IMAGEWIKILINK, pagelink, "", "" ).addContent( el );
326                break;
327
328            case EXTERNAL:
329                el = createAnchor( EXTERNAL, link, text, section );
330                if( m_useRelNofollow ) {
331                    el.setAttribute( "rel", "nofollow" );
332                }
333                break;
334
335            case INTERWIKI:
336                el = createAnchor( INTERWIKI, link, text, section );
337                break;
338
339            case ATTACHMENT:
340                final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link );
341                final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link );
342                final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" );
343                el = createAnchor( ATTACHMENT, attlink, text, "" );
344                if(  m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) {
345                    el.setAttribute("download", "");
346                }
347
348                pushElement( el );
349                popElement( el.getName() );
350
351                if( m_useAttachmentImage ) {
352                    el = new Element( "img" ).setAttribute( "src", imglink );
353                    el.setAttribute( "border", "0" );
354                    el.setAttribute( "alt", "(info)" );
355
356                    el = new Element( "a" ).setAttribute( "href", infolink ).addContent( el );
357                    el.setAttribute( "class", "infolink" );
358                } else {
359                    el = null;
360                }
361                break;
362
363            default:
364                break;
365        }
366
367        if( el != null && attributes != null ) {
368            while( attributes.hasNext() ) {
369                final Attribute attr = attributes.next();
370                if( attr != null ) {
371                    el.setAttribute( attr );
372                }
373            }
374        }
375
376        if( el != null ) {
377            flushPlainText();
378            m_currentElement.addContent( el );
379        }
380        return el;
381    }
382
383    /**
384     *  These are all the HTML 4.01 block-level elements.
385     */
386    private static final String[] BLOCK_ELEMENTS = {
387        "address", "blockquote", "div", "dl", "fieldset", "form",
388        "h1", "h2", "h3", "h4", "h5", "h6",
389        "hr", "noscript", "ol", "p", "pre", "table", "ul"
390    };
391
392    private static boolean isBlockLevel( final String name ) {
393        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
394    }
395
396    /**
397     *  This method peeks ahead in the stream until EOL and returns the result. It will keep the buffers untouched.
398     *
399     *  @return The string from the current position to the end of line.
400     */
401    // FIXME: Always returns an empty line, even if the stream is full.
402    private String peekAheadLine() throws IOException {
403        final String s = readUntilEOL().toString();
404        if( s.length() > PUSHBACK_BUFFER_SIZE ) {
405            LOG.warn( "Line is longer than maximum allowed size (" + PUSHBACK_BUFFER_SIZE + " characters.  Attempting to recover..." );
406            pushBack( s.substring( 0, PUSHBACK_BUFFER_SIZE - 1 ) );
407        } else {
408            try {
409                pushBack( s );
410            } catch( final IOException e ) {
411                LOG.warn( "Pushback failed: the line is probably too long.  Attempting to recover." );
412            }
413        }
414        return s;
415    }
416
417    private int flushPlainText() {
418        final int numChars = m_plainTextBuf.length();
419        if( numChars > 0 ) {
420            String buf;
421
422            if( !m_allowHTML ) {
423                buf = escapeHTMLEntities(m_plainTextBuf.toString());
424            } else {
425                buf = m_plainTextBuf.toString();
426            }
427            //  We must first empty the buffer because the side effect of calling makeCamelCaseLink() is to call this routine.
428            m_plainTextBuf = new StringBuilder(20);
429            try {
430                // This is the heaviest part of parsing, and therefore we can do some optimization here.
431                // 1) Only when the length of the buffer is big enough, we try to do the match
432                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) {
433                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) {
434                        final MatchResult result = m_camelCaseMatcher.getMatch();
435                        final String firstPart = buf.substring( 0, result.beginOffset( 0 ) );
436                        String prefix = result.group( 1 );
437                        if( prefix == null ) {
438                            prefix = "";
439                        }
440
441                        final String camelCase = result.group(2);
442                        final String protocol  = result.group(3);
443                        String uri       = protocol+result.group(4);
444                        buf              = buf.substring(result.endOffset(0));
445
446                        m_currentElement.addContent( firstPart );
447                        //  Check if the user does not wish to do URL or WikiWord expansion
448                        if( prefix.endsWith( "~" ) || prefix.indexOf( '[' ) != -1 ) {
449                            if( prefix.endsWith( "~" ) ) {
450                                if( m_wysiwygEditorMode ) {
451                                    m_currentElement.addContent( "~" );
452                                }
453                                prefix = prefix.substring( 0, prefix.length() - 1 );
454                            }
455                            if( camelCase != null ) {
456                                m_currentElement.addContent( prefix + camelCase );
457                            } else if( protocol != null ) {
458                                m_currentElement.addContent( prefix + uri );
459                            }
460                            continue;
461                        }
462
463                        // Fine, then let's check what kind of link this was and emit the proper elements
464                        if( protocol != null ) {
465                            final char c = uri.charAt( uri.length() - 1 );
466                            if( c == '.' || c == ',' ) {
467                                uri = uri.substring( 0, uri.length() - 1 );
468                                buf = c + buf;
469                            }
470                            // System.out.println("URI match "+uri);
471                            m_currentElement.addContent( prefix );
472                            makeDirectURILink( uri );
473                        } else {
474                            // System.out.println("Matched: '"+camelCase+"'");
475                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
476                            // System.out.println("prefix="+prefix);
477                            m_currentElement.addContent( prefix );
478                            makeCamelCaseLink( camelCase );
479                        }
480                    }
481                    m_currentElement.addContent( buf );
482                } else {
483                    //  No camelcase asked for, just add the elements
484                    m_currentElement.addContent( buf );
485                }
486            } catch( final IllegalDataException e ) {
487                // Sometimes it's possible that illegal XML chars is added to the data. Here we make sure it does not stop parsing.
488                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
489            }
490        }
491
492        return numChars;
493    }
494
495    /**
496     *  Escapes XML entities in a HTML-compatible way (i.e. does not escape entities that are already escaped).
497     *
498     *  @param buf
499     *  @return An escaped string.
500     */
501    private String escapeHTMLEntities( final String buf ) {
502        final StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 );
503        for( int i = 0; i < buf.length(); i++ ) {
504            final char ch = buf.charAt(i);
505            if( ch == '<' ) {
506                tmpBuf.append("&lt;");
507            } else if( ch == '>' ) {
508                tmpBuf.append("&gt;");
509            } else if( ch == '\"' ) {
510                tmpBuf.append("&quot;");
511            } else if( ch == '&' ) {
512                // If the following is an XML entity reference (&#.*;) we'll leave it as it is; otherwise we'll replace it with an &amp;
513                boolean isEntity = false;
514                final StringBuilder entityBuf = new StringBuilder();
515                if( i < buf.length() -1 ) {
516                    for( int j = i; j < buf.length(); j++ ) {
517                        final char ch2 = buf.charAt( j );
518                        if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) {
519                            entityBuf.append(ch2);
520                            if( ch2 == ';' ) {
521                                isEntity = true;
522                                break;
523                            }
524                        } else {
525                            break;
526                        }
527                    }
528                }
529
530                if( isEntity ) {
531                    tmpBuf.append( entityBuf );
532                    i = i + entityBuf.length() - 1;
533                } else {
534                    tmpBuf.append( "&amp;" );
535                }
536
537            } else {
538                tmpBuf.append( ch );
539            }
540        }
541
542        return tmpBuf.toString();
543    }
544
545    private Element pushElement( final Element e ) {
546        flushPlainText();
547        m_currentElement.addContent( e );
548        m_currentElement = e;
549
550        return e;
551    }
552
553    private Element addElement( final Content e ) {
554        if( e != null ) {
555            flushPlainText();
556            m_currentElement.addContent( e );
557        }
558        return m_currentElement;
559    }
560
561    /**
562     *  All elements that can be empty by the HTML DTD.
563     */
564    //  Keep sorted.
565    private static final String[] EMPTY_ELEMENTS = {
566        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
567    };
568
569    /**
570     *  Goes through the current element stack and pops all elements until this
571     *  element is found - this essentially "closes" and element.
572     *
573     *  @param s element to be found.
574     *  @return The new current element, or null, if there was no such element in the entire stack.
575     */
576    private Element popElement( final String s ) {
577        final int flushedBytes = flushPlainText();
578        Element currEl = m_currentElement;
579        while( currEl.getParentElement() != null ) {
580            if( currEl.getName().equals( s ) && !currEl.isRootElement() ) {
581                m_currentElement = currEl.getParentElement();
582
583                //  Check if it's okay for this element to be empty.  Then we will
584                //  trick the JDOM generator into not generating an empty element,
585                //  by putting an empty string between the tags.  Yes, it's a kludge
586                //  but what'cha gonna do about it. :-)
587                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) {
588                    currEl.addContent( "" );
589                }
590                return m_currentElement;
591            }
592            currEl = currEl.getParentElement();
593        }
594        return null;
595    }
596
597
598    /**
599     * Reads the stream until it meets one of the specified ending characters, or stream end. The ending
600     * character will be left in the stream.
601     */
602    private String readUntil( final String endChars ) throws IOException {
603        final StringBuilder sb = new StringBuilder( 80 );
604        int ch = nextToken();
605        while( ch != -1 ) {
606            if( ch == '\\' ) {
607                ch = nextToken();
608                if( ch == -1 ) {
609                    break;
610                }
611            } else {
612                if( endChars.indexOf( ( char )ch ) != -1 ) {
613                    pushBack( ch );
614                    break;
615                }
616            }
617            sb.append( ( char )ch );
618            ch = nextToken();
619        }
620
621        return sb.toString();
622    }
623
624    /**
625     *  Reads the stream while the characters that have been specified are
626     *  in the stream, returning then the result as a String.
627     */
628    private String readWhile( final String endChars ) throws IOException {
629        final StringBuilder sb = new StringBuilder( 80 );
630        int ch = nextToken();
631        while( ch != -1 ) {
632            if( endChars.indexOf( ( char ) ch ) == -1 ) {
633                pushBack( ch );
634                break;
635            }
636            sb.append( ( char ) ch );
637            ch = nextToken();
638        }
639
640        return sb.toString();
641    }
642
643    private JSPWikiMarkupParser m_cleanTranslator;
644
645    /**
646     *  Does a lazy init.  Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before
647     *  the TranslatorReader it is contained by is up.
648     */
649    private JSPWikiMarkupParser getCleanTranslator() {
650        if( m_cleanTranslator == null ) {
651            final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() );
652            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
653            m_cleanTranslator.m_allowHTML = true;
654        }
655
656        return m_cleanTranslator;
657    }
658
659    /**
660     *  Modifies the "hd" parameter to contain proper values.  Because
661     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
662     *  % after url encoding with '_'.
663     *  <p>
664     *  Counts also duplicate headings (= headings with similar name), and
665     *  attaches a counter.
666     */
667    protected String makeHeadingAnchor( final String baseName, String title, final Heading hd ) {
668        hd.m_titleText = title;
669        title = MarkupParser.wikifyLink( title );
670        hd.m_titleSection = m_engine.encodeName(title);
671        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) {
672            final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1;
673            m_titleSectionCounter.put( hd.m_titleSection, count );
674            hd.m_titleSection += "-" + count;
675        } else {
676            m_titleSectionCounter.put( hd.m_titleSection, 1 );
677        }
678
679        hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection;
680        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
681        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
682
683        return hd.m_titleAnchor;
684    }
685
686    private String makeSectionTitle( String title ) {
687        title = title.trim();
688        try {
689            final JSPWikiMarkupParser dtr = getCleanTranslator();
690            dtr.setInputReader( new StringReader( title ) );
691            final WikiDocument doc = dtr.parse();
692            doc.setContext( m_context );
693
694            return XmlUtil.extractTextFromDocument( doc );
695        } catch( final IOException e ) {
696            LOG.fatal("Title parsing not working", e );
697            throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e );
698        }
699    }
700
701    /**
702     *  Returns XHTML for the heading.
703     *
704     *  @param level The level of the heading.  @see Heading
705     *  @param title the title for the heading
706     *  @param hd a List to which heading should be added
707     *  @return An Element containing the heading
708     */
709    public Element makeHeading( final int level, final String title, final Heading hd ) {
710        final Element el;
711        final String pageName = m_context.getPage().getName();
712        final String outTitle = makeSectionTitle( title );
713        hd.m_level = level;
714
715        switch( level ) {
716          case Heading.HEADING_SMALL:
717            el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
718            break;
719
720          case Heading.HEADING_MEDIUM:
721            el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
722            break;
723
724          case Heading.HEADING_LARGE:
725            el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
726            break;
727
728          default:
729            throw new InternalWikiException( "Illegal heading type " + level );
730        }
731
732        return el;
733    }
734
735    /**
736     *  When given a link to a WikiName, we just return a proper HTML link for it.  The local link mutator
737     *  chain is also called.
738     */
739    private Element makeCamelCaseLink( final String wikiname ) {
740        final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname );
741        callMutatorChain( m_localLinkMutatorChain, wikiname );
742        if( matchedLink != null ) {
743            makeLink( READ, matchedLink, wikiname, null, null );
744        } else {
745            makeLink( EDIT, wikiname, wikiname, null, null );
746        }
747
748        return m_currentElement;
749    }
750
751    /** Holds the image URL for the duration of this parser */
752    private String m_outlinkImageURL;
753
754    /**
755     * Returns an element for the external link image (out.png).  However, this method caches the URL for the lifetime
756     * of this MarkupParser, because it's commonly used, and we'll end up with possibly hundreds our thousands of
757     * references to it...  It's a lot faster, too.
758     *
759     * @return  An element containing the HTML for the outlink image.
760     */
761    private Element outlinkImage() {
762        Element el = null;
763        if( m_useOutlinkImage ) {
764            if( m_outlinkImageURL == null ) {
765                m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE );
766            }
767
768            el = new Element( "img" ).setAttribute( "class", OUTLINK );
769            el.setAttribute( "src", m_outlinkImageURL );
770            el.setAttribute( "alt","" );
771        }
772
773        return el;
774    }
775
776    /**
777     *  Takes a URL and turns it into a regular wiki link. Unfortunately, because of the way that flushPlainText()
778     *  works, it already encodes all the XML entities. But so does WikiContext.getURL(), so we
779     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
780     *  <p>
781     *  What a crappy problem.
782     *
783     * @param url provided url.
784     * @return An anchor Element containing the link.
785     */
786    private Element makeDirectURILink( String url ) {
787        final Element result;
788        String last = null;
789
790        if( url.endsWith( "," ) || url.endsWith( "." ) ) {
791            last = url.substring( url.length() - 1 );
792            url = url.substring( 0, url.length() - 1 );
793        }
794
795        callMutatorChain( m_externalLinkMutatorChain, url );
796
797        if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) {
798            result = handleImageLink( StringUtils.replace( url, "&amp;", "&" ), url, false );
799        } else {
800            result = makeLink( EXTERNAL, StringUtils.replace( url, "&amp;", "&" ), url, null, null );
801            addElement( outlinkImage() );
802        }
803
804        if( last != null ) {
805            m_plainTextBuf.append( last );
806        }
807
808        return result;
809    }
810
811    /**
812     *  Image links are handled differently:
813     *  1. If the text is a WikiName of an existing page, it gets linked.
814     *  2. If the text is an external link, then it is inlined.
815     *  3. Otherwise, it becomes an ALT text.
816     *
817     *  @param reallink The link to the image.
818     *  @param link     Link text portion, may be a link to somewhere else.
819     *  @param hasLinkText If true, then the defined link had a link text available.
820     *                  This means that the link text may be a link to a wiki page,
821     *                  or an external resource.
822     */
823    private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText ) {
824        final String possiblePage = MarkupParser.cleanLink( link );
825        if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) {
826            return makeLink( IMAGELINK, reallink, link, null, null );
827        } else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) {
828            callMutatorChain( m_localLinkMutatorChain, possiblePage );
829            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
830        } else {
831            return makeLink( IMAGE, reallink, link, null, null );
832        }
833    }
834
835    private Element handleAccessRule( String ruleLine ) {
836        if( m_wysiwygEditorMode ) {
837            m_currentElement.addContent( "[" + ruleLine + "]" );
838        }
839        if( !m_parseAccessRules ) {
840            return m_currentElement;
841        }
842        final Page page = m_context.getRealPage();
843        // UserDatabase db = m_context.getEngine().getUserDatabase();
844
845        if( ruleLine.startsWith( "{" ) ) {
846            ruleLine = ruleLine.substring( 1 );
847        }
848
849        if( ruleLine.endsWith( "}" ) ) {
850            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
851        }
852
853        LOG.debug("page={}, ACL = {}", page.getName(), ruleLine);
854
855        try {
856            final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine );
857            page.setAcl( acl );
858            LOG.debug( acl.toString() );
859        } catch( final WikiSecurityException wse ) {
860            return makeError( wse.getMessage() );
861        }
862
863        return m_currentElement;
864    }
865
866    /**
867     *  Handles metadata setting [{SET foo=bar}]
868     */
869    private Element handleMetadata( final String link ) {
870        if( m_wysiwygEditorMode ) {
871            m_currentElement.addContent( "[" + link + "]" );
872        }
873
874        try {
875            final String args = link.substring( link.indexOf(' '), link.length()-1 );
876            final String name = args.substring( 0, args.indexOf('=') ).trim();
877            String val  = args.substring( args.indexOf('=')+1 ).trim();
878
879            if( val.startsWith("'") ) {
880                val = val.substring( 1 );
881            }
882            if( val.endsWith("'") ) {
883                val = val.substring( 0, val.length()-1 );
884            }
885
886            // LOG.debug("SET name='"+name+"', value='"+val+"'.");
887
888            if( !name.isEmpty() && !val.isEmpty() ) {
889                val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val );
890                m_context.getPage().setAttribute( name, val );
891            }
892        } catch( final Exception e ) {
893            final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
894            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
895        }
896
897        return m_currentElement;
898    }
899
900    /**
901     *  Emits a processing instruction that will disable markup escaping. This is
902     *  very useful if you want to emit HTML directly into the stream.
903     */
904    private void disableOutputEscaping() {
905        addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) );
906    }
907
908    /**
909     *  Gobbles up all hyperlinks that are encased in square brackets.
910     */
911    private Element handleHyperlinks( String linktext, final int pos ) {
912        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
913        final StringBuilder sb = new StringBuilder( linktext.length() + 80 );
914
915        if( m_linkParsingOperations.isAccessRule( linktext ) ) {
916            return handleAccessRule( linktext );
917        }
918
919        if( m_linkParsingOperations.isMetadata( linktext ) ) {
920            return handleMetadata( linktext );
921        }
922
923        if( m_linkParsingOperations.isPluginLink( linktext ) ) {
924            try {
925                final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
926
927                // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not.
928                if( pluginContent != null ) {
929                    addElement( pluginContent );
930                    pluginContent.executeParse( m_context );
931                }
932            } catch( final PluginException e ) {
933                LOG.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
934                //LOG.info( "Root cause:",e.getRootThrowable() );
935                if( !m_wysiwygEditorMode ) {
936                    final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE );
937                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ),
938                                                                        m_context.getRealPage().getWiki(),
939                                                                        m_context.getRealPage().getName(),
940                                                                        e.getMessage() ) ) );
941                }
942            }
943            return m_currentElement;
944        }
945
946        try {
947            final LinkParser.Link link = m_linkParser.parse( linktext );
948            linktext = link.getText();
949            String linkref = link.getReference();
950            //  Yes, we now have the components separated.
951            //  linktext = the text the link should have
952            //  linkref  = the url or page name.
953            //  In many cases these are the same.  [linktext|linkref].
954            if( m_linkParsingOperations.isVariableLink( linktext ) ) {
955                final Content el = new VariableContent( linktext );
956                addElement( el );
957            } else if( m_linkParsingOperations.isExternalLink( linkref ) ) {
958                // It's an external link, out of this Wiki
959                callMutatorChain( m_externalLinkMutatorChain, linkref );
960                if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
961                    handleImageLink( linkref, linktext, link.hasReference() );
962                } else {
963                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
964                    addElement( outlinkImage() );
965                }
966            } else if( link.isInterwikiLink() ) {
967                // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved.
968
969                // FIXME: There is an interesting issue here:  We probably should
970                //        URLEncode the wikiPage, but we can't since some of the
971                //        Wikis use slashes (/), which won't survive URLEncoding.
972                //        Besides, we don't know which character set the other Wiki
973                //        is using, so you'll have to write the entire name as it appears
974                //        in the URL.  Bugger.
975
976                final String extWiki = link.getExternalWiki();
977                final String wikiPage = link.getExternalWikiPage();
978                if( m_wysiwygEditorMode ) {
979                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
980                } else {
981                    String urlReference = m_engine.getInterWikiURL( extWiki );
982                    if( urlReference != null ) {
983                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
984                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
985
986                        if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) {
987                            handleImageLink( urlReference, linktext, link.hasReference() );
988                        } else {
989                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
990                        }
991                        if( m_linkParsingOperations.isExternalLink( urlReference ) ) {
992                            addElement( outlinkImage() );
993                        }
994                    } else {
995                        final Object[] args = { escapeHTMLEntities( extWiki ) };
996                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
997                    }
998                }
999            } else if( linkref.startsWith( "#" ) ) {
1000                // It defines a local footnote
1001                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
1002            } else if( TextUtil.isNumber( linkref ) ) {
1003                // It defines a reference to a local footnote
1004                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
1005            } else {
1006                final int hashMark;
1007
1008                // Internal wiki link, but is it an attachment link?
1009                String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref );
1010                if( attachment != null ) {
1011                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
1012                    if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
1013                        attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment );
1014                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
1015                    } else {
1016                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
1017                    }
1018                } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) {
1019                    // It's an internal Wiki link, but to a named section
1020                    final String namedSection = linkref.substring( hashMark + 1 );
1021                    linkref = linkref.substring( 0, hashMark );
1022                    linkref = MarkupParser.cleanLink( linkref );
1023                    callMutatorChain( m_localLinkMutatorChain, linkref );
1024                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1025                    if( matchedLink != null ) {
1026                        String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) );
1027                        sectref = sectref.replace( '%', '_' );
1028                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
1029                    } else {
1030                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1031                    }
1032                } else {
1033                    // It's an internal Wiki link
1034                    linkref = MarkupParser.cleanLink( linkref );
1035                    callMutatorChain( m_localLinkMutatorChain, linkref );
1036                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
1037                    if( matchedLink != null ) {
1038                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
1039                    } else {
1040                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
1041                    }
1042                }
1043            }
1044
1045        } catch( final ParseException e ) {
1046            LOG.info( "Parser failure: ", e );
1047            final Object[] args = { e.getMessage() };
1048            addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1049        }
1050        return m_currentElement;
1051    }
1052
1053    /**
1054     *  Pushes back any string that has been read.  It will obviously be pushed back in a reverse order.
1055     *
1056     *  @since 2.1.77
1057     */
1058    private void pushBack( final String s ) throws IOException {
1059        for( int i = s.length()-1; i >= 0; i-- ) {
1060            pushBack( s.charAt(i) );
1061        }
1062    }
1063
1064    private Element handleBackslash() throws IOException {
1065        final int ch = nextToken();
1066        if( ch == '\\' ) {
1067            final int ch2 = nextToken();
1068            if( ch2 == '\\' ) {
1069                pushElement( new Element( "br" ).setAttribute( "clear", "all" ) );
1070                return popElement( "br" );
1071            }
1072            pushBack( ch2 );
1073            pushElement( new Element( "br" ) );
1074            return popElement( "br" );
1075        }
1076        pushBack( ch );
1077        return null;
1078    }
1079
1080    private Element handleUnderscore() throws IOException {
1081        final int ch = nextToken();
1082        Element el = null;
1083        if( ch == '_' ) {
1084            if( m_isbold ) {
1085                el = popElement( "b" );
1086            } else {
1087                el = pushElement( new Element( "b" ) );
1088            }
1089            m_isbold = !m_isbold;
1090        } else {
1091            pushBack( ch );
1092        }
1093
1094        return el;
1095    }
1096
1097
1098    /**
1099     *  For example: italics.
1100     */
1101    private Element handleApostrophe() throws IOException {
1102        final int ch = nextToken();
1103        Element el = null;
1104
1105        if( ch == '\'' ) {
1106            if( m_isitalic ) {
1107                el = popElement( "i" );
1108            } else {
1109                el = pushElement( new Element( "i" ) );
1110            }
1111            m_isitalic = !m_isitalic;
1112        } else {
1113            pushBack( ch );
1114        }
1115
1116        return el;
1117    }
1118
1119    private Element handleOpenbrace( final boolean isBlock ) throws IOException {
1120        final int ch = nextToken();
1121        if( ch == '{' ) {
1122            final int ch2 = nextToken();
1123            if( ch2 == '{' ) {
1124                m_isPre = true;
1125                m_isEscaping = true;
1126                m_isPreBlock = isBlock;
1127                if( isBlock ) {
1128                    startBlockLevel();
1129                    return pushElement( new Element( "pre" ) );
1130                }
1131
1132                return pushElement( new Element( "span" ).setAttribute( "class", "inline-code" ) );
1133            }
1134            pushBack( ch2 );
1135            return pushElement( new Element( "tt" ) );
1136        }
1137        pushBack( ch );
1138        return null;
1139    }
1140
1141    /**
1142     *  Handles both }} and }}}
1143     */
1144    private Element handleClosebrace() throws IOException {
1145        final int ch2 = nextToken();
1146        if( ch2 == '}' ) {
1147            final int ch3 = nextToken();
1148            if( ch3 == '}' ) {
1149                if( m_isPre ) {
1150                    if( m_isPreBlock ) {
1151                        popElement( "pre" );
1152                    } else {
1153                        popElement( "span" );
1154                    }
1155                    m_isPre = false;
1156                    m_isEscaping = false;
1157                    return m_currentElement;
1158                }
1159                m_plainTextBuf.append( "}}}" );
1160                return m_currentElement;
1161            }
1162            pushBack( ch3 );
1163            if( !m_isEscaping ) {
1164                return popElement( "tt" );
1165            }
1166        }
1167        pushBack( ch2 );
1168        return null;
1169    }
1170
1171    private Element handleDash() throws IOException {
1172        int ch = nextToken();
1173        if( ch == '-' ) {
1174            final int ch2 = nextToken();
1175            if( ch2 == '-' ) {
1176                final int ch3 = nextToken();
1177                if( ch3 == '-' ) {
1178                    // Empty away all the rest of the dashes.
1179                    // Do not forget to return the first non-match back.
1180                    do {
1181                        ch = nextToken();
1182                    } while ( ch == '-' );
1183
1184                    pushBack( ch );
1185                    startBlockLevel();
1186                    pushElement( new Element( "hr" ) );
1187                    return popElement( "hr" );
1188                }
1189                pushBack( ch3 );
1190            }
1191            pushBack( ch2 );
1192        }
1193        pushBack( ch );
1194        return null;
1195    }
1196
1197    private Element handleHeading() throws IOException {
1198        final Element el;
1199        final int ch  = nextToken();
1200        final Heading hd = new Heading();
1201        if( ch == '!' ) {
1202            final int ch2 = nextToken();
1203            if( ch2 == '!' ) {
1204                final String title = peekAheadLine();
1205                el = makeHeading( Heading.HEADING_LARGE, title, hd );
1206            } else {
1207                pushBack( ch2 );
1208                final String title = peekAheadLine();
1209                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1210            }
1211        } else {
1212            pushBack( ch );
1213            final String title = peekAheadLine();
1214            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1215        }
1216
1217        callHeadingListenerChain( hd );
1218        m_lastHeading = hd;
1219        if( el != null ) {
1220            pushElement( el );
1221        }
1222        return el;
1223    }
1224
1225    /**
1226     * Reads the stream until the next EOL or EOF.  Note that it will also read the EOL from the stream.
1227     */
1228    private StringBuilder readUntilEOL() throws IOException {
1229        int ch;
1230        final StringBuilder buf = new StringBuilder( 256 );
1231        while( true ) {
1232            ch = nextToken();
1233            if( ch == -1 ) {
1234                break;
1235            }
1236            buf.append( (char) ch );
1237            if( ch == '\n' ) {
1238                break;
1239            }
1240        }
1241        return buf;
1242    }
1243
1244    /** Controls whether italic is restarted after a paragraph shift */
1245
1246    private boolean m_restartitalic;
1247    private boolean m_restartbold;
1248
1249    private boolean m_newLine;
1250
1251    /**
1252     * Starts a block level element, therefore closing a potential open paragraph tag.
1253     */
1254    private void startBlockLevel() {
1255        // These may not continue over block level limits in XHTML
1256        popElement( "i" );
1257        popElement( "b" );
1258        popElement( "tt" );
1259        if( m_isOpenParagraph ) {
1260            m_isOpenParagraph = false;
1261            popElement( "p" );
1262            m_plainTextBuf.append( "\n" ); // Just small beautification
1263        }
1264        m_restartitalic = m_isitalic;
1265        m_restartbold   = m_isbold;
1266        m_isitalic = false;
1267        m_isbold   = false;
1268    }
1269
1270    private static String getListType( final char c ) {
1271        if( c == '*' ) {
1272            return "ul";
1273        } else if( c == '#' ) {
1274            return "ol";
1275        }
1276        throw new InternalWikiException( "Parser got faulty list type: " + c );
1277    }
1278    /**
1279     * Like original handleOrderedList() and handleUnorderedList(),
1280     * however handles both ordered ('#') and unordered ('*') mixed together.
1281     */
1282    // FIXME: Refactor this; it's a bit messy.
1283    private Element handleGeneralList() throws IOException {
1284         startBlockLevel();
1285         String strBullets = readWhile( "*#" );
1286         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
1287         final int numBullets = strBullets.length();
1288
1289         // override the beginning portion of bullet pattern to be like the previous to simulate PHPWiki style lists
1290
1291        if( m_allowPHPWikiStyleLists ) {
1292            // only substitute if different
1293            if( !( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) ) {
1294                if( numBullets <= m_genlistlevel ) {
1295                    // Substitute all but the last character (keep the expressed bullet preference)
1296                    strBullets = ( numBullets > 1 ? m_genlistBulletBuffer.substring( 0, numBullets - 1 ) : "" ) +
1297                                 strBullets.charAt( numBullets - 1 );
1298                } else {
1299                    strBullets = m_genlistBulletBuffer + strBullets.substring( m_genlistlevel, numBullets );
1300                }
1301            }
1302        }
1303
1304         //  Check if this is still of the same type
1305        if( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) {
1306            if( numBullets > m_genlistlevel ) {
1307                pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel++ ) ) ) );
1308                for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) {
1309                    // bullets are growing, get from new bullet list
1310                    pushElement( new Element( "li" ) );
1311                    pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel ) ) ) );
1312                }
1313            } else if( numBullets < m_genlistlevel ) {
1314                //  Close the previous list item.
1315                popElement( "li" );
1316                for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) {
1317                    // bullets are shrinking, get from old bullet list
1318                    popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) );
1319                    if( m_genlistlevel > 0 ) {
1320                        popElement( "li" );
1321                    }
1322                }
1323            } else {
1324                if( m_genlistlevel > 0 ) {
1325                    popElement( "li" );
1326                }
1327            }
1328        } else {
1329            //  The pattern has changed, unwind and restart
1330            int numEqualBullets;
1331            final int numCheckBullets;
1332
1333            // find out how much is the same
1334            numEqualBullets = 0;
1335            numCheckBullets = Math.min( numBullets, m_genlistlevel );
1336
1337            while( numEqualBullets < numCheckBullets ) {
1338                // if the bullets are equal so far, keep going
1339                if( strBullets.charAt( numEqualBullets ) == m_genlistBulletBuffer.charAt( numEqualBullets ) )
1340                    numEqualBullets++;
1341                    // otherwise giveup, we have found how many are equal
1342                else
1343                    break;
1344            }
1345
1346            //unwind
1347            for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) {
1348                popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) );
1349                if( m_genlistlevel > numBullets ) {
1350                    popElement( "li" );
1351                }
1352            }
1353
1354            //rewind
1355            pushElement( new Element( getListType( strBullets.charAt( numEqualBullets++ ) ) ) );
1356            for( int i = numEqualBullets; i < numBullets; i++ ) {
1357                pushElement( new Element( "li" ) );
1358                pushElement( new Element( getListType( strBullets.charAt( i ) ) ) );
1359            }
1360            m_genlistlevel = numBullets;
1361        }
1362
1363         // Push a new list item, and eat away any extra whitespace
1364        pushElement( new Element( "li" ) );
1365        readWhile( " " );
1366
1367        // work done, remember the new bullet list (in place of old one)
1368        m_genlistBulletBuffer.setLength( 0 );
1369        m_genlistBulletBuffer.append( strBullets );
1370        return m_currentElement;
1371    }
1372
1373    private Element unwindGeneralList() {
1374        // unwind
1375        for( ; m_genlistlevel > 0; m_genlistlevel-- ) {
1376            popElement( "li" );
1377            popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) );
1378        }
1379        m_genlistBulletBuffer.setLength( 0 );
1380        return null;
1381    }
1382
1383
1384    private Element handleDefinitionList() {
1385        if( !m_isdefinition ) {
1386            m_isdefinition = true;
1387            startBlockLevel();
1388            pushElement( new Element( "dl" ) );
1389            return pushElement( new Element( "dt" ) );
1390        }
1391        return null;
1392    }
1393
1394    private Element handleOpenbracket() throws IOException {
1395        final StringBuilder sb = new StringBuilder( 40 );
1396        final int pos = getPosition();
1397        int ch = nextToken();
1398        boolean isPlugin = false;
1399        if( ch == '[' ) {
1400            if( m_wysiwygEditorMode ) {
1401                sb.append( '[' );
1402            }
1403            sb.append( ( char )ch );
1404            while( ( ch = nextToken() ) == '[' ) {
1405                sb.append( ( char )ch );
1406            }
1407        }
1408
1409        if( ch == '{' ) {
1410            isPlugin = true;
1411        }
1412
1413        pushBack( ch );
1414
1415        if( sb.length() > 0 ) {
1416            m_plainTextBuf.append( sb );
1417            return m_currentElement;
1418        }
1419
1420        //  Find end of hyperlink
1421        ch = nextToken();
1422        int nesting = 1; // Check for nested plugins
1423        while( ch != -1 ) {
1424            final int ch2 = nextToken();
1425            pushBack( ch2 );
1426            if( isPlugin ) {
1427                if( ch == '[' && ch2 == '{' ) {
1428                    nesting++;
1429                } else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) {
1430                    break;
1431                } else if( ch == '}' && ch2 == ']' ) {
1432                    // NB: This will be decremented once at the end
1433                    nesting--;
1434                }
1435            } else {
1436                if( ch == ']' ) {
1437                    break;
1438                }
1439            }
1440
1441            sb.append( (char) ch );
1442
1443            ch = nextToken();
1444        }
1445
1446        //  If the link is never finished, do some tricks to display the rest of the line unchanged.
1447        if( ch == -1 ) {
1448            LOG.debug( "Warning: unterminated link detected!" );
1449            m_isEscaping = true;
1450            m_plainTextBuf.append( sb );
1451            flushPlainText();
1452            m_isEscaping = false;
1453            return m_currentElement;
1454        }
1455
1456        return handleHyperlinks( sb.toString(), pos );
1457    }
1458
1459    /**
1460     *  Reads the stream until the current brace is closed or stream end.
1461     */
1462    private String readBraceContent( final char opening, final char closing ) throws IOException {
1463        final StringBuilder sb = new StringBuilder( 40 );
1464        int braceLevel = 1;
1465        int ch;
1466        while( ( ch = nextToken() ) != -1 ) {
1467            if( ch == '\\' ) {
1468                continue;
1469            } else if( ch == opening ) {
1470                braceLevel++;
1471            } else if( ch == closing ) {
1472                braceLevel--;
1473                if( braceLevel == 0 ) {
1474                    break;
1475                }
1476            }
1477            sb.append( ( char ) ch );
1478        }
1479        return sb.toString();
1480    }
1481
1482
1483    /**
1484     * Handles constructs of type %%(style) and %%class
1485     * @return An Element containing the div or span, depending on the situation.
1486     * @throws IOException
1487     */
1488    private Element handleDiv( ) throws IOException {
1489        int ch = nextToken();
1490        Element el = null;
1491
1492        if( ch == '%' ) {
1493            String style = null;
1494            String clazz = null;
1495
1496            ch = nextToken();
1497
1498            //  Style or class?
1499            if( ch == '(' ) {
1500                style = readBraceContent('(',')');
1501            } else if( Character.isLetter( (char) ch ) ) {
1502                pushBack( ch );
1503                clazz = readUntil( "( \t\n\r" );
1504                //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters
1505                //CSS Classnames can contain only the characters [a-zA-Z0-9] and
1506                //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_".
1507                //They cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1508
1509                //(1) replace '.' by spaces, allowing multiple classnames on a div or span
1510                //(2) remove any invalid character
1511                if( clazz != null ) {
1512                    clazz = clazz.replace( '.', ' ' )
1513                                 .replaceAll( "[^\\s-_\\w\\x200-\\x377]+", "" );
1514                }
1515                ch = nextToken();
1516
1517                // check for %%class1.class2( style information )
1518                if( ch == '(' ) {
1519                    style = readBraceContent( '(', ')' );
1520                //  Pop out only spaces, so that the upcoming EOL check does not check the next line.
1521                } else if( ch == '\n' || ch == '\r' ) {
1522                    pushBack( ch );
1523                }
1524            } else {
1525                // Anything else stops.
1526                pushBack( ch );
1527                try {
1528                    final Boolean isSpan = m_styleStack.pop();
1529                    if( isSpan == null ) {
1530                        // Fail quietly
1531                    } else if( isSpan ) {
1532                        el = popElement( "span" );
1533                    } else {
1534                        el = popElement( "div" );
1535                    }
1536                } catch( final EmptyStackException e ) {
1537                    LOG.debug( "Page '" + m_context.getName() + "' closes a %%-block that has not been opened." );
1538                    return m_currentElement;
1539                }
1540                return el;
1541            }
1542
1543            //  Check if there is an attempt to do something nasty
1544            try {
1545                style = StringEscapeUtils.unescapeHtml4(style);
1546                if( style != null && style.contains( "javascript:" ) ) {
1547                    LOG.debug( "Attempt to output javascript within CSS: {}", style );
1548                    final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1549                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
1550                }
1551            } catch( final NumberFormatException e ) {
1552                //  If there are unknown entities, we don't want the parser to stop.
1553                final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1554                final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
1555                return addElement( makeError( msg ) );
1556            }
1557
1558            //  Decide if we should open a div or a span?
1559            final String eol = peekAheadLine();
1560
1561            if( !eol.trim().isEmpty() ) {
1562                // There is stuff after the class
1563                el = new Element("span");
1564                m_styleStack.push( Boolean.TRUE );
1565            } else {
1566                startBlockLevel();
1567                el = new Element("div");
1568                m_styleStack.push( Boolean.FALSE );
1569            }
1570
1571            if( style != null ) el.setAttribute("style", style);
1572            if( clazz != null ) el.setAttribute("class", clazz);
1573            return pushElement( el );
1574        }
1575        pushBack( ch );
1576        return el;
1577    }
1578
1579    private Element handleSlash( ) throws IOException {
1580        final int ch = nextToken();
1581        pushBack( ch );
1582        if( ch == '%' && !m_styleStack.isEmpty() ) {
1583            return handleDiv();
1584        }
1585
1586        return null;
1587    }
1588
1589    private Element handleBar( final boolean newLine ) throws IOException {
1590        Element el;
1591        if( !m_istable && !newLine ) {
1592            return null;
1593        }
1594
1595        //  If the bar is in the first column, we will either start a new table or continue the old one.
1596        if( newLine ) {
1597            if( !m_istable ) {
1598                startBlockLevel();
1599                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
1600                m_istable = true;
1601                m_rowNum = 0;
1602            }
1603
1604            m_rowNum++;
1605            final Element tr = ( m_rowNum % 2 != 0 )
1606                       ? new Element("tr").setAttribute("class", "odd")
1607                       : new Element("tr");
1608            el = pushElement( tr );
1609        }
1610
1611        //  Check out which table cell element to start; a header element (th) or a regular element (td).
1612        final int ch = nextToken();
1613        if( ch == '|' ) {
1614            if( !newLine ) {
1615                el = popElement("th");
1616                if( el == null ) popElement("td");
1617            }
1618            el = pushElement( new Element("th") );
1619        } else {
1620            if( !newLine ) {
1621                el = popElement( "td" );
1622                if( el == null ) popElement( "th" );
1623            }
1624            el = pushElement( new Element("td") );
1625            pushBack( ch );
1626        }
1627        return el;
1628    }
1629
1630    /**
1631     *  Generic escape of next character or entity.
1632     */
1633    private Element handleTilde() throws IOException {
1634        final int ch = nextToken();
1635
1636        if( ch == ' ' ) {
1637            if( m_wysiwygEditorMode ) {
1638                m_plainTextBuf.append( "~ " );
1639            }
1640            return m_currentElement;
1641        }
1642
1643        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
1644            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
1645            ch == '{' || ch == ']' || ch == '}' || ch == '%' ) {
1646            if( m_wysiwygEditorMode ) {
1647                m_plainTextBuf.append( '~' );
1648            }
1649            m_plainTextBuf.append( ( char ) ch );
1650            m_plainTextBuf.append( readWhile( "" + ( char ) ch ) );
1651            return m_currentElement;
1652        }
1653        // No escape.
1654        pushBack( ch );
1655        return null;
1656    }
1657
1658    private void fillBuffer( final Element startElement ) throws IOException {
1659        m_currentElement = startElement;
1660        m_newLine = true;
1661        boolean quitReading = false;
1662        disableOutputEscaping();
1663        while( !quitReading ) {
1664            final int ch = nextToken();
1665            if( ch == -1 ) {
1666                break;
1667            }
1668
1669            //  Check if we're actually ending the preformatted mode. We still must do an entity transformation here.
1670            if( m_isEscaping ) {
1671                if( ch == '}' ) {
1672                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
1673                } else if( ch == -1 ) {
1674                    quitReading = true;
1675                }
1676                else if( ch == '\r' ) {
1677                    // DOS line feeds we ignore.
1678                } else if( ch == '<' ) {
1679                    m_plainTextBuf.append( "&lt;" );
1680                } else if( ch == '>' ) {
1681                    m_plainTextBuf.append( "&gt;" );
1682                } else if( ch == '&' ) {
1683                    m_plainTextBuf.append( "&amp;" );
1684                } else if( ch == '~' ) {
1685                    String braces = readWhile( "}" );
1686                    if( braces.length() >= 3 ) {
1687                        m_plainTextBuf.append( "}}}" );
1688                        braces = braces.substring(3);
1689                    } else {
1690                        m_plainTextBuf.append( (char) ch );
1691                    }
1692
1693                    for( int i = braces.length()-1; i >= 0; i-- ) {
1694                        pushBack( braces.charAt( i ) );
1695                    }
1696                } else {
1697                    m_plainTextBuf.append( (char) ch );
1698                }
1699
1700                continue;
1701            }
1702
1703            //  An empty line stops a list
1704            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) {
1705                m_plainTextBuf.append(unwindGeneralList());
1706            }
1707
1708            if( m_newLine && ch != '|' && m_istable ) {
1709                popElement( "table" );
1710                m_istable = false;
1711            }
1712
1713            int skip = IGNORE;
1714            //  Do the actual parsing and catch any errors.
1715            try {
1716                skip = parseToken( ch );
1717            } catch( final IllegalDataException e ) {
1718                LOG.info( "Page {} contains data which cannot be added to DOM tree: {}", m_context.getPage().getName(), e.getMessage() );
1719                makeError( "Error: " + cleanupSuspectData( e.getMessage() ) );
1720            }
1721
1722            // The idea is as follows:  If the handler method returns an element (el != null), it is assumed that it
1723            // has been added in the stack.  Otherwise, the character is added as is to the plaintext buffer.
1724            //
1725            // For the transition phase, if s != null, it also gets added in the plaintext buffer.
1726            switch( skip ) {
1727                case ELEMENT:
1728                    m_newLine = false;
1729                    break;
1730
1731                case CHARACTER:
1732                    m_plainTextBuf.append( (char) ch );
1733                    m_newLine = false;
1734                    break;
1735
1736                case IGNORE:
1737                default:
1738                    break;
1739            }
1740        }
1741
1742        closeHeadings();
1743        popElement( "domroot" );
1744    }
1745
1746    private String cleanupSuspectData( final String s ) {
1747        final StringBuilder sb = new StringBuilder( s.length() );
1748        for( int i = 0; i < s.length(); i++ ) {
1749            final char c = s.charAt(i);
1750            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
1751            else sb.append( "0x" ).append( Integer.toString( c, 16 ).toUpperCase() );
1752        }
1753
1754        return sb.toString();
1755    }
1756
1757    /** The token is a plain character. */
1758    protected static final int CHARACTER = 0;
1759
1760    /** The token is a wikimarkup element. */
1761    protected static final int ELEMENT   = 1;
1762
1763    /** The token is to be ignored. */
1764    protected static final int IGNORE    = 2;
1765
1766    /**
1767     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
1768     *  you think this was a wiki markup element, and IGNORE, if you think
1769     *  we should ignore this altogether.
1770     *  <p>
1771     *  To add your own MarkupParser, you can override this method, but it
1772     *  is recommended that you call super.parseToken() as well to gain advantage
1773     *  of JSPWiki's own markup.  You can call it at the start of your own
1774     *  parseToken() or end - it does not matter.
1775     *
1776     * @param ch The character under investigation
1777     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
1778     * @throws IOException If parsing fails.
1779     */
1780    protected int parseToken( final int ch ) throws IOException {
1781        Element el = null;
1782        //  Now, check the incoming token.
1783        switch( ch ) {
1784          case '\r':
1785            // DOS linefeeds we forget
1786            return IGNORE;
1787
1788          case '\n':
1789            //  Close things like headings, etc.
1790            // FIXME: This is not really very fast
1791            closeHeadings();
1792
1793            popElement( "dl" ); // Close definition lists.
1794            if( m_istable ) {
1795                popElement("tr");
1796            }
1797            m_isdefinition = false;
1798            if( m_newLine ) {
1799                // Paragraph change.
1800                startBlockLevel();
1801                //  Figure out which elements cannot be enclosed inside a <p></p> pair according to XHTML rules.
1802                final String nextLine = peekAheadLine();
1803                if( nextLine.isEmpty() ||
1804                     ( !nextLine.isEmpty() &&
1805                       !nextLine.startsWith( "{{{" ) &&
1806                       !nextLine.startsWith( "----" ) &&
1807                       !nextLine.startsWith( "%%" ) &&
1808                       "*#!;".indexOf( nextLine.charAt( 0 ) ) == -1 ) ) {
1809                    pushElement( new Element( "p" ) );
1810                    m_isOpenParagraph = true;
1811
1812                    if( m_restartitalic ) {
1813                        pushElement( new Element( "i" ) );
1814                        m_isitalic = true;
1815                        m_restartitalic = false;
1816                    }
1817                    if( m_restartbold ) {
1818                        pushElement( new Element( "b" ) );
1819                        m_isbold = true;
1820                        m_restartbold = false;
1821                    }
1822                }
1823            } else {
1824                m_plainTextBuf.append("\n");
1825                m_newLine = true;
1826            }
1827            return IGNORE;
1828
1829          case '\\':
1830            el = handleBackslash();
1831            break;
1832
1833          case '_':
1834            el = handleUnderscore();
1835            break;
1836
1837          case '\'':
1838            el = handleApostrophe();
1839            break;
1840
1841          case '{':
1842            el = handleOpenbrace( m_newLine );
1843            break;
1844
1845          case '}':
1846            el = handleClosebrace();
1847            break;
1848
1849          case '-':
1850            if( m_newLine ) {
1851                el = handleDash();
1852            }
1853            break;
1854
1855          case '!':
1856            if( m_newLine ) {
1857                el = handleHeading();
1858            }
1859            break;
1860
1861          case ';':
1862            if( m_newLine ) {
1863                el = handleDefinitionList();
1864            }
1865            break;
1866
1867          case ':':
1868            if( m_isdefinition ) {
1869                popElement( "dt" );
1870                el = pushElement( new Element( "dd" ) );
1871                m_isdefinition = false;
1872            }
1873            break;
1874
1875          case '[':
1876            el = handleOpenbracket();
1877            break;
1878
1879          case '*':
1880            if( m_newLine ) {
1881                pushBack( '*' );
1882                el = handleGeneralList();
1883            }
1884            break;
1885
1886          case '#':
1887            if( m_newLine ) {
1888                pushBack( '#' );
1889                el = handleGeneralList();
1890            }
1891            break;
1892
1893          case '|':
1894            el = handleBar( m_newLine );
1895            break;
1896
1897          case '~':
1898            el = handleTilde();
1899            break;
1900
1901          case '%':
1902            el = handleDiv();
1903            break;
1904
1905          case '/':
1906            el = handleSlash();
1907            break;
1908
1909          default:
1910            break;
1911        }
1912
1913        return el != null ? ELEMENT : CHARACTER;
1914    }
1915
1916    private void closeHeadings() {
1917        if( m_lastHeading != null && !m_wysiwygEditorMode ) {
1918            // Add the hash anchor element at the end of the heading
1919            addElement( new Element("a").setAttribute( "class",HASHLINK )
1920                                              .setAttribute( "href","#" + m_lastHeading.m_titleAnchor )
1921                                              .setText( "#" ) );
1922            m_lastHeading = null;
1923        }
1924        popElement( "h2" );
1925        popElement( "h3" );
1926        popElement( "h4" );
1927    }
1928
1929    /**
1930     *  Parses the entire document from the Reader given in the constructor or set by {@link #setInputReader(Reader)}.
1931     *
1932     *  @return A WikiDocument, ready to be passed to the renderer.
1933     *  @throws IOException If parsing cannot be accomplished.
1934     */
1935    @Override
1936    public WikiDocument parse() throws IOException {
1937        final WikiDocument d = new WikiDocument( m_context.getPage() );
1938        d.setContext( m_context );
1939        final Element rootElement = new Element( "domroot" );
1940        d.setRootElement( rootElement );
1941        fillBuffer( rootElement );
1942        paragraphify( rootElement );
1943
1944        return d;
1945    }
1946
1947    /**
1948     *  Checks out that the first paragraph is correctly installed.
1949     *
1950     *  @param rootElement element to be checked.
1951     */
1952    private void paragraphify( final Element rootElement) {
1953        //  Add the paragraph tag to the first paragraph
1954        final List< Content > kids = rootElement.getContent();
1955        if( rootElement.getChild( "p" ) != null ) {
1956            final ArrayList<Content> ls = new ArrayList<>();
1957            int idxOfFirstContent = 0;
1958            int count = 0;
1959
1960            for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) {
1961                final Content c = i.next();
1962                if( c instanceof Element ) {
1963                    final String name = ( ( Element )c ).getName();
1964                    if( isBlockLevel( name ) ) {
1965                        break;
1966                    }
1967                }
1968
1969                if( !( c instanceof ProcessingInstruction ) ) {
1970                    ls.add( c );
1971                    if( idxOfFirstContent == 0 ) {
1972                        idxOfFirstContent = count;
1973                    }
1974                }
1975            }
1976
1977            //  If there were any elements, then add a new <p> (unless it would be an empty one)
1978            if( ls.size() > 0 ) {
1979                final Element newel = new Element("p");
1980                for( final Content c : ls ) {
1981                    c.detach();
1982                    newel.addContent( c );
1983                }
1984
1985                // Make sure there are no empty <p/> tags added.
1986                if( !newel.getTextTrim().isEmpty() || !newel.getChildren().isEmpty() ) {
1987                    rootElement.addContent( idxOfFirstContent, newel );
1988                }
1989            }
1990        }
1991    }
1992
1993}