001    /*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.commons.text.StringEscapeUtils;
023import org.apache.logging.log4j.LogManager;
024import org.apache.logging.log4j.Logger;
025import org.apache.oro.text.regex.MalformedPatternException;
026import org.apache.oro.text.regex.MatchResult;
027import org.apache.oro.text.regex.Pattern;
028import org.apache.oro.text.regex.PatternCompiler;
029import org.apache.oro.text.regex.PatternMatcher;
030import org.apache.oro.text.regex.Perl5Compiler;
031import org.apache.oro.text.regex.Perl5Matcher;
032import org.apache.wiki.InternalWikiException;
033import org.apache.wiki.StringTransmutator;
034import org.apache.wiki.api.core.Acl;
035import org.apache.wiki.api.core.Context;
036import org.apache.wiki.api.core.ContextEnum;
037import org.apache.wiki.api.core.Page;
038import org.apache.wiki.api.exceptions.PluginException;
039import org.apache.wiki.api.plugin.Plugin;
040import org.apache.wiki.api.spi.Wiki;
041import org.apache.wiki.attachment.AttachmentManager;
042import org.apache.wiki.auth.AuthorizationManager;
043import org.apache.wiki.auth.UserManager;
044import org.apache.wiki.auth.WikiSecurityException;
045import org.apache.wiki.auth.acl.AclManager;
046import org.apache.wiki.i18n.InternationalizationManager;
047import org.apache.wiki.preferences.Preferences;
048import org.apache.wiki.util.TextUtil;
049import org.apache.wiki.util.XmlUtil;
050import org.apache.wiki.variables.VariableManager;
051import org.jdom2.Attribute;
052import org.jdom2.Content;
053import org.jdom2.Element;
054import org.jdom2.IllegalDataException;
055import org.jdom2.ProcessingInstruction;
056import org.jdom2.Verifier;
057
058import javax.xml.transform.Result;
059import java.io.IOException;
060import java.io.Reader;
061import java.io.StringReader;
062import java.text.MessageFormat;
063import java.util.ArrayList;
064import java.util.Arrays;
065import java.util.Collection;
066import java.util.EmptyStackException;
067import java.util.HashMap;
068import java.util.Iterator;
069import java.util.List;
070import java.util.Map;
071import java.util.Properties;
072import java.util.ResourceBundle;
073import java.util.Stack;
074
075/**
076 * Parses JSPWiki-style markup into a WikiDocument DOM tree.  This class is the heart and soul of JSPWiki : make
077 * sure you test properly anything that is added, or else it breaks down horribly.
078 *
079 *  @since  2.4
080 */
081public class JSPWikiMarkupParser extends MarkupParser {
082
083    protected static final int              READ          = 0;
084    protected static final int              EDIT          = 1;
085    protected static final int              EMPTY         = 2;  // Empty message
086    protected static final int              LOCAL         = 3;
087    protected static final int              LOCALREF      = 4;
088    protected static final int              IMAGE         = 5;
089    protected static final int              EXTERNAL      = 6;
090    protected static final int              INTERWIKI     = 7;
091    protected static final int              IMAGELINK     = 8;
092    protected static final int              IMAGEWIKILINK = 9;
093    protected static final int              ATTACHMENT    = 10;
094
095    private static final Logger LOG = LogManager.getLogger( JSPWikiMarkupParser.class );
096
097    private boolean        m_isbold;
098    private boolean        m_isitalic;
099    private boolean        m_istable;
100    private boolean        m_isPre;
101    private boolean        m_isEscaping;
102    private boolean        m_isdefinition;
103    private boolean        m_isPreBlock;
104
105    /** Contains style information, in multiple forms. */
106    private final Stack< Boolean > m_styleStack = new Stack<>();
107
108     // general list handling
109    private int m_genlistlevel;
110    private final StringBuilder m_genlistBulletBuffer = new StringBuilder( 10 );  // stores the # and * pattern
111    private final boolean m_allowPHPWikiStyleLists = true;
112
113    private boolean m_isOpenParagraph;
114
115    /** Parser for extended link functionality. */
116    private final LinkParser m_linkParser = new LinkParser();
117
118    /** Keeps track of any plain text that gets put in the Text nodes */
119    private StringBuilder m_plainTextBuf = new StringBuilder( 20 );
120
121    private Element m_currentElement;
122
123    /** Keep track of duplicate header names.  */
124    private final Map< String, Integer > m_titleSectionCounter = new HashMap<>();
125
126    /** If true, then considers CamelCase links as well. */
127    private boolean m_camelCaseLinks;
128
129    /** If true, then generate special output for wysiwyg editing in certain cases */
130    private boolean m_wysiwygEditorMode;
131
132    /** If true, consider URIs that have no brackets as well. */
133    // FIXME: Currently reserved, but not used.
134    private boolean m_plainUris;
135
136    /** If true, all outward links use a small link image. */
137    private boolean m_useOutlinkImage = true;
138
139    private boolean m_useAttachmentImage = true;
140
141    /** If true, allows raw HTML. */
142    private boolean m_allowHTML;
143
144    private boolean m_useRelNofollow;
145
146    private final PatternCompiler m_compiler = new Perl5Compiler();
147
148    static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
149
150    private final PatternMatcher m_camelCaseMatcher = new Perl5Matcher();
151    private Pattern m_camelCasePattern;
152
153    private int m_rowNum = 1;
154
155    private Heading m_lastHeading;
156
157    private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern";
158
159    /**
160     *  Creates a markup parser.
161     *
162     *  @param context The WikiContext which controls the parsing
163     *  @param in Where the data is read from.
164     */
165    public JSPWikiMarkupParser( final Context context, final Reader in ) {
166        super( context, in );
167        initialize();
168    }
169
170    // FIXME: parsers should be pooled for better performance.
171    private void initialize() {
172        initInlineImagePatterns();
173
174        m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN );
175        if( m_camelCasePattern == null ) {
176            try {
177                m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK );
178            } catch( final MalformedPatternException e ) {
179                LOG.fatal("Internal error: Someone put in a faulty pattern.",e);
180                throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e);
181            }
182            m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern );
183        }
184
185        //  Set the properties.
186        final Properties props = m_engine.getWikiProperties();
187        final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS );
188
189        if( cclinks != null ) {
190            m_camelCaseLinks = TextUtil.isPositive( cclinks );
191        } else {
192            m_camelCaseLinks  = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks );
193        }
194
195        final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE );
196        if( wysiwygVariable != null ) {
197            m_wysiwygEditorMode = wysiwygVariable;
198        }
199
200        m_plainUris          = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris );
201        m_useOutlinkImage    = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage );
202        m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage );
203        m_allowHTML          = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML );
204        m_useRelNofollow     = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow );
205
206        if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) {
207            disableAccessRules();
208        }
209
210        m_context.getPage().setHasMetadata();
211    }
212
213    /**
214     *  Calls a transmutator chain.
215     *
216     *  @param list Chain to call
217     *  @param text Text that should be passed to the mutate() method of each of the mutators in the chain.
218     *  @return The result of the mutation.
219     */
220    protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) {
221        if( list == null || list.isEmpty()) {
222            return text;
223        }
224
225        for( final StringTransmutator m : list ) {
226            text = m.mutate( m_context, text );
227        }
228
229        return text;
230    }
231
232    /**
233     * Calls the heading listeners.
234     *
235     * @param param A Heading object.
236     */
237    private void callHeadingListenerChain( final Heading param ) {
238        for( final HeadingListener h : m_headingListenerChain ) {
239            h.headingAdded( m_context, param );
240        }
241    }
242
243    /**
244     *  Creates a JDOM anchor element.  Can be overridden to change the URL creation, if you really know what you are doing.
245     *
246     *  @param type One of the types above
247     *  @param link URL to which to link to
248     *  @param text Link text
249     *  @param section If a particular section identifier is required.
250     *  @return An 'A' element.
251     *  @since 2.4.78
252     */
253    private Element createAnchor( final int type, final String link, String text, String section ) {
254        text = TextUtil.escapeHTMLEntities( text );
255        section = TextUtil.escapeHTMLEntities( section );
256        final Element el = new Element( "a" );
257        el.setAttribute( "class", CLASS_TYPES[ type ] );
258        el.setAttribute( "href", link + section );
259        el.addContent( text );
260        return el;
261    }
262
263    private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes ) {
264        Element el = null;
265        if( text == null ) {
266            text = link;
267        }
268        text = callMutatorChain( m_linkMutators, text );
269        section = (section != null) ? ("#"+section) : "";
270
271        // Make sure we make a link name that can be accepted  as a valid URL.
272        if( link.isEmpty() ) {
273            type = EMPTY;
274        }
275        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
276
277        switch( type ) {
278            case READ:
279                el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section );
280                break;
281
282            case EDIT:
283                el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" );
284                el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) );
285                break;
286
287            case EMPTY:
288                el = new Element("u").addContent(text);
289                break;
290
291            // These two are for local references - footnotes and references to footnotes.
292            // We embed the page name (or whatever WikiContext gives us) to make sure the links are unique across Wiki.
293            case LOCALREF:
294                el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" );
295                break;
296
297            case LOCAL:
298                el = new Element( "a" ).setAttribute( "class", CLASS_FOOTNOTE );
299                el.setAttribute( "name", "ref-" + m_context.getName() + "-" + link.substring( 1 ) );
300                if( !m_allowHTML ) {
301                    el.addContent( "[" + TextUtil.escapeHTMLEntities( text ) + "]" );
302                } else {
303                    el.addContent( "[" + text + "]" );
304                }
305                break;
306
307                //  With the image, external and interwiki types we need to make sure nobody can put in Javascript or
308                //  something else annoying into the links themselves.  We do this by preventing a haxor from stopping
309                //  the link name short with quotes in fillBuffer().
310            case IMAGE:
311                el = new Element( "img" ).setAttribute( "class", "inline" );
312                el.setAttribute( "src", link );
313                el.setAttribute( "alt", text );
314                break;
315
316            case IMAGELINK:
317                el = new Element( "img" ).setAttribute( "class", "inline" );
318                el.setAttribute( "src", link );
319                el.setAttribute( "alt", text );
320                el = createAnchor( IMAGELINK, text, "", "" ).addContent( el );
321                break;
322
323            case IMAGEWIKILINK:
324                final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text );
325                el = new Element( "img" ).setAttribute( "class", "inline" );
326                el.setAttribute( "src", link );
327                el.setAttribute( "alt", text );
328                el = createAnchor( IMAGEWIKILINK, pagelink, "", "" ).addContent( el );
329                break;
330
331            case EXTERNAL:
332                el = createAnchor( EXTERNAL, link, text, section );
333                if( m_useRelNofollow ) {
334                    el.setAttribute( "rel", "nofollow" );
335                }
336                break;
337
338            case INTERWIKI:
339                el = createAnchor( INTERWIKI, link, text, section );
340                break;
341
342            case ATTACHMENT:
343                final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link );
344                final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link );
345                final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" );
346                el = createAnchor( ATTACHMENT, attlink, text, "" );
347                if(  m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) {
348                    el.setAttribute("download", "");
349                }
350
351                pushElement( el );
352                popElement( el.getName() );
353
354                if( m_useAttachmentImage ) {
355                    el = new Element( "img" ).setAttribute( "src", imglink );
356                    el.setAttribute( "border", "0" );
357                    el.setAttribute( "alt", "(info)" );
358
359                    el = new Element( "a" ).setAttribute( "href", infolink ).addContent( el );
360                    el.setAttribute( "class", "infolink" );
361                } else {
362                    el = null;
363                }
364                break;
365
366            default:
367                break;
368        }
369
370        if( el != null && attributes != null ) {
371            while( attributes.hasNext() ) {
372                final Attribute attr = attributes.next();
373                if( attr != null ) {
374                    el.setAttribute( attr );
375                }
376            }
377        }
378
379        if( el != null ) {
380            flushPlainText();
381            m_currentElement.addContent( el );
382        }
383        return el;
384    }
385
386    /**
387     *  These are all the HTML 4.01 block-level elements.
388     */
389    private static final String[] BLOCK_ELEMENTS = {
390        "address", "blockquote", "div", "dl", "fieldset", "form",
391        "h1", "h2", "h3", "h4", "h5", "h6",
392        "hr", "noscript", "ol", "p", "pre", "table", "ul"
393    };
394
395    private static boolean isBlockLevel( final String name ) {
396        return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0;
397    }
398
399    /**
400     *  This method peeks ahead in the stream until EOL and returns the result. It will keep the buffers untouched.
401     *
402     *  @return The string from the current position to the end of line.
403     */
404    // FIXME: Always returns an empty line, even if the stream is full.
405    private String peekAheadLine() throws IOException {
406        final String s = readUntilEOL().toString();
407        if( s.length() > PUSHBACK_BUFFER_SIZE ) {
408            LOG.warn( "Line is longer than maximum allowed size (" + PUSHBACK_BUFFER_SIZE + " characters.  Attempting to recover..." );
409            pushBack( s.substring( 0, PUSHBACK_BUFFER_SIZE - 1 ) );
410        } else {
411            try {
412                pushBack( s );
413            } catch( final IOException e ) {
414                LOG.warn( "Pushback failed: the line is probably too long.  Attempting to recover." );
415            }
416        }
417        return s;
418    }
419
420    private int flushPlainText() {
421        final int numChars = m_plainTextBuf.length();
422        if( numChars > 0 ) {
423            String buf;
424
425            if( !m_allowHTML ) {
426                buf = TextUtil.escapeHTMLEntities( m_plainTextBuf.toString() );
427            } else {
428                buf = m_plainTextBuf.toString();
429            }
430            //  We must first empty the buffer because the side effect of calling makeCamelCaseLink() is to call this routine.
431            m_plainTextBuf = new StringBuilder(20);
432            try {
433                // This is the heaviest part of parsing, and therefore we can do some optimization here.
434                // 1) Only when the length of the buffer is big enough, we try to do the match
435                if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) {
436                    while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) {
437                        final MatchResult result = m_camelCaseMatcher.getMatch();
438                        final String firstPart = buf.substring( 0, result.beginOffset( 0 ) );
439                        String prefix = result.group( 1 );
440                        if( prefix == null ) {
441                            prefix = "";
442                        }
443
444                        final String camelCase = result.group(2);
445                        final String protocol  = result.group(3);
446                        String uri       = protocol+result.group(4);
447                        buf              = buf.substring(result.endOffset(0));
448
449                        m_currentElement.addContent( firstPart );
450                        //  Check if the user does not wish to do URL or WikiWord expansion
451                        if( prefix.endsWith( "~" ) || prefix.indexOf( '[' ) != -1 ) {
452                            if( prefix.endsWith( "~" ) ) {
453                                if( m_wysiwygEditorMode ) {
454                                    m_currentElement.addContent( "~" );
455                                }
456                                prefix = prefix.substring( 0, prefix.length() - 1 );
457                            }
458                            if( camelCase != null ) {
459                                m_currentElement.addContent( prefix + camelCase );
460                            } else if( protocol != null ) {
461                                m_currentElement.addContent( prefix + uri );
462                            }
463                            continue;
464                        }
465
466                        // Fine, then let's check what kind of link this was and emit the proper elements
467                        if( protocol != null ) {
468                            final char c = uri.charAt( uri.length() - 1 );
469                            if( c == '.' || c == ',' ) {
470                                uri = uri.substring( 0, uri.length() - 1 );
471                                buf = c + buf;
472                            }
473                            // System.out.println("URI match "+uri);
474                            m_currentElement.addContent( prefix );
475                            makeDirectURILink( uri );
476                        } else {
477                            // System.out.println("Matched: '"+camelCase+"'");
478                            // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
479                            // System.out.println("prefix="+prefix);
480                            m_currentElement.addContent( prefix );
481                            makeCamelCaseLink( camelCase );
482                        }
483                    }
484                    m_currentElement.addContent( buf );
485                } else {
486                    //  No camelcase asked for, just add the elements
487                    m_currentElement.addContent( buf );
488                }
489            } catch( final IllegalDataException e ) {
490                // Sometimes it's possible that illegal XML chars is added to the data. Here we make sure it does not stop parsing.
491                m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) );
492            }
493        }
494
495        return numChars;
496    }
497
498    private Element pushElement( final Element e ) {
499        flushPlainText();
500        m_currentElement.addContent( e );
501        m_currentElement = e;
502
503        return e;
504    }
505
506    private Element addElement( final Content e ) {
507        if( e != null ) {
508            flushPlainText();
509            m_currentElement.addContent( e );
510        }
511        return m_currentElement;
512    }
513
514    /**
515     *  All elements that can be empty by the HTML DTD.
516     */
517    //  Keep sorted.
518    private static final String[] EMPTY_ELEMENTS = {
519        "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param"
520    };
521
522    /**
523     *  Goes through the current element stack and pops all elements until this
524     *  element is found - this essentially "closes" and element.
525     *
526     *  @param s element to be found.
527     *  @return The new current element, or null, if there was no such element in the entire stack.
528     */
529    private Element popElement( final String s ) {
530        final int flushedBytes = flushPlainText();
531        Element currEl = m_currentElement;
532        while( currEl.getParentElement() != null ) {
533            if( currEl.getName().equals( s ) && !currEl.isRootElement() ) {
534                m_currentElement = currEl.getParentElement();
535
536                //  Check if it's okay for this element to be empty.  Then we will
537                //  trick the JDOM generator into not generating an empty element,
538                //  by putting an empty string between the tags.  Yes, it's a kludge
539                //  but what'cha gonna do about it. :-)
540                if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) {
541                    currEl.addContent( "" );
542                }
543                return m_currentElement;
544            }
545            currEl = currEl.getParentElement();
546        }
547        return null;
548    }
549
550
551    /**
552     * Reads the stream until it meets one of the specified ending characters, or stream end. The ending
553     * character will be left in the stream.
554     */
555    private String readUntil( final String endChars ) throws IOException {
556        final StringBuilder sb = new StringBuilder( 80 );
557        int ch = nextToken();
558        while( ch != -1 ) {
559            if( ch == '\\' ) {
560                ch = nextToken();
561                if( ch == -1 ) {
562                    break;
563                }
564            } else {
565                if( endChars.indexOf( ( char )ch ) != -1 ) {
566                    pushBack( ch );
567                    break;
568                }
569            }
570            sb.append( ( char )ch );
571            ch = nextToken();
572        }
573
574        return sb.toString();
575    }
576
577    /**
578     *  Reads the stream while the characters that have been specified are
579     *  in the stream, returning then the result as a String.
580     */
581    private String readWhile( final String endChars ) throws IOException {
582        final StringBuilder sb = new StringBuilder( 80 );
583        int ch = nextToken();
584        while( ch != -1 ) {
585            if( endChars.indexOf( ( char ) ch ) == -1 ) {
586                pushBack( ch );
587                break;
588            }
589            sb.append( ( char ) ch );
590            ch = nextToken();
591        }
592
593        return sb.toString();
594    }
595
596    private JSPWikiMarkupParser m_cleanTranslator;
597
598    /**
599     *  Does a lazy init.  Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before
600     *  the TranslatorReader it is contained by is up.
601     */
602    private JSPWikiMarkupParser getCleanTranslator() {
603        if( m_cleanTranslator == null ) {
604            final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() );
605            m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null );
606            m_cleanTranslator.m_allowHTML = true;
607        }
608
609        return m_cleanTranslator;
610    }
611
612    /**
613     *  Modifies the "hd" parameter to contain proper values.  Because
614     *  an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
615     *  % after url encoding with '_'.
616     *  <p>
617     *  Counts also duplicate headings (= headings with similar name), and
618     *  attaches a counter.
619     */
620    protected String makeHeadingAnchor( final String baseName, String title, final Heading hd ) {
621        hd.m_titleText = title;
622        title = MarkupParser.wikifyLink( title );
623        hd.m_titleSection = m_engine.encodeName(title);
624        if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) {
625            final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1;
626            m_titleSectionCounter.put( hd.m_titleSection, count );
627            hd.m_titleSection += "-" + count;
628        } else {
629            m_titleSectionCounter.put( hd.m_titleSection, 1 );
630        }
631
632        hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection;
633        hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' );
634        hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' );
635
636        return hd.m_titleAnchor;
637    }
638
639    private String makeSectionTitle( String title ) {
640        title = title.trim();
641        try {
642            final JSPWikiMarkupParser dtr = getCleanTranslator();
643            dtr.setInputReader( new StringReader( title ) );
644            final WikiDocument doc = dtr.parse();
645            doc.setContext( m_context );
646
647            return XmlUtil.extractTextFromDocument( doc );
648        } catch( final IOException e ) {
649            LOG.fatal("Title parsing not working", e );
650            throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e );
651        }
652    }
653
654    /**
655     *  Returns XHTML for the heading.
656     *
657     *  @param level The level of the heading.  @see Heading
658     *  @param title the title for the heading
659     *  @param hd a List to which heading should be added
660     *  @return An Element containing the heading
661     */
662    public Element makeHeading( final int level, final String title, final Heading hd ) {
663        final Element el;
664        final String pageName = m_context.getPage().getName();
665        final String outTitle = makeSectionTitle( title );
666        hd.m_level = level;
667
668        switch( level ) {
669          case Heading.HEADING_SMALL:
670            el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
671            break;
672
673          case Heading.HEADING_MEDIUM:
674            el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
675            break;
676
677          case Heading.HEADING_LARGE:
678            el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) );
679            break;
680
681          default:
682            throw new InternalWikiException( "Illegal heading type " + level );
683        }
684
685        return el;
686    }
687
688    /**
689     *  When given a link to a WikiName, we just return a proper HTML link for it.  The local link mutator
690     *  chain is also called.
691     */
692    private Element makeCamelCaseLink( final String wikiname ) {
693        final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname );
694        callMutatorChain( m_localLinkMutatorChain, wikiname );
695        if( matchedLink != null ) {
696            makeLink( READ, matchedLink, wikiname, null, null );
697        } else {
698            makeLink( EDIT, wikiname, wikiname, null, null );
699        }
700
701        return m_currentElement;
702    }
703
704    /** Holds the image URL for the duration of this parser */
705    private String m_outlinkImageURL;
706
707    /**
708     * Returns an element for the external link image (out.png).  However, this method caches the URL for the lifetime
709     * of this MarkupParser, because it's commonly used, and we'll end up with possibly hundreds our thousands of
710     * references to it...  It's a lot faster, too.
711     *
712     * @return  An element containing the HTML for the outlink image.
713     */
714    private Element outlinkImage() {
715        Element el = null;
716        if( m_useOutlinkImage ) {
717            if( m_outlinkImageURL == null ) {
718                m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE );
719            }
720
721            el = new Element( "img" ).setAttribute( "class", OUTLINK );
722            el.setAttribute( "src", m_outlinkImageURL );
723            el.setAttribute( "alt","" );
724        }
725
726        return el;
727    }
728
729    /**
730     *  Takes a URL and turns it into a regular wiki link. Unfortunately, because of the way that flushPlainText()
731     *  works, it already encodes all the XML entities. But so does WikiContext.getURL(), so we
732     *  have to do a reverse-replace here, so that it can again be replaced in makeLink.
733     *  <p>
734     *  What a crappy problem.
735     *
736     * @param url provided url.
737     * @return An anchor Element containing the link.
738     */
739    private Element makeDirectURILink( String url ) {
740        final Element result;
741        String last = null;
742
743        if( url.endsWith( "," ) || url.endsWith( "." ) ) {
744            last = url.substring( url.length() - 1 );
745            url = url.substring( 0, url.length() - 1 );
746        }
747
748        callMutatorChain( m_externalLinkMutatorChain, url );
749
750        if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) {
751            result = handleImageLink( StringUtils.replace( url, "&amp;", "&" ), url, false );
752        } else {
753            result = makeLink( EXTERNAL, StringUtils.replace( url, "&amp;", "&" ), url, null, null );
754            addElement( outlinkImage() );
755        }
756
757        if( last != null ) {
758            m_plainTextBuf.append( last );
759        }
760
761        return result;
762    }
763
764    /**
765     *  Image links are handled differently:
766     *  1. If the text is a WikiName of an existing page, it gets linked.
767     *  2. If the text is an external link, then it is inlined.
768     *  3. Otherwise, it becomes an ALT text.
769     *
770     *  @param reallink The link to the image.
771     *  @param link     Link text portion, may be a link to somewhere else.
772     *  @param hasLinkText If true, then the defined link had a link text available.
773     *                  This means that the link text may be a link to a wiki page,
774     *                  or an external resource.
775     */
776    private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText ) {
777        final String possiblePage = MarkupParser.cleanLink( link );
778        if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) {
779            return makeLink( IMAGELINK, reallink, link, null, null );
780        } else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) {
781            callMutatorChain( m_localLinkMutatorChain, possiblePage );
782            return makeLink( IMAGEWIKILINK, reallink, link, null, null );
783        } else {
784            return makeLink( IMAGE, reallink, link, null, null );
785        }
786    }
787
788    private Element handleAccessRule( String ruleLine ) {
789        if( m_wysiwygEditorMode ) {
790            m_currentElement.addContent( "[" + ruleLine + "]" );
791        }
792        if( !m_parseAccessRules ) {
793            return m_currentElement;
794        }
795        final Page page = m_context.getRealPage();
796        // UserDatabase db = m_context.getEngine().getUserDatabase();
797
798        if( ruleLine.startsWith( "{" ) ) {
799            ruleLine = ruleLine.substring( 1 );
800        }
801
802        if( ruleLine.endsWith( "}" ) ) {
803            ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 );
804        }
805
806        LOG.debug("page={}, ACL = {}", page.getName(), ruleLine);
807
808        try {
809            final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine );
810            page.setAcl( acl );
811            LOG.debug( acl.toString() );
812        } catch( final WikiSecurityException wse ) {
813            return makeError( wse.getMessage() );
814        }
815
816        return m_currentElement;
817    }
818
819    /**
820     *  Handles metadata setting [{SET foo=bar}]
821     */
822    private Element handleMetadata( final String link ) {
823        if( m_wysiwygEditorMode ) {
824            m_currentElement.addContent( "[" + link + "]" );
825        }
826
827        try {
828            final String args = link.substring( link.indexOf(' '), link.length()-1 );
829            final String name = args.substring( 0, args.indexOf('=') ).trim();
830            String val  = args.substring( args.indexOf('=')+1 ).trim();
831
832            if( val.startsWith("'") ) {
833                val = val.substring( 1 );
834            }
835            if( val.endsWith("'") ) {
836                val = val.substring( 0, val.length()-1 );
837            }
838
839            // LOG.debug("SET name='"+name+"', value='"+val+"'.");
840
841            if( !name.isEmpty() && !val.isEmpty() ) {
842                val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val );
843                m_context.getPage().setAttribute( name, val );
844            }
845        } catch( final Exception e ) {
846            final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
847            return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) );
848        }
849
850        return m_currentElement;
851    }
852
853    /**
854     *  Emits a processing instruction that will disable markup escaping. This is
855     *  very useful if you want to emit HTML directly into the stream.
856     */
857    private void disableOutputEscaping() {
858        addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) );
859    }
860
861    /**
862     *  Gobbles up all hyperlinks that are encased in square brackets.
863     */
864    private Element handleHyperlinks( String linktext, final int pos ) {
865        final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
866        final StringBuilder sb = new StringBuilder( linktext.length() + 80 );
867
868        if( m_linkParsingOperations.isAccessRule( linktext ) ) {
869            return handleAccessRule( linktext );
870        }
871
872        if( m_linkParsingOperations.isMetadata( linktext ) ) {
873            return handleMetadata( linktext );
874        }
875
876        if( m_linkParsingOperations.isPluginLink( linktext ) ) {
877            try {
878                final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos );
879
880                // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not.
881                if( pluginContent != null ) {
882                    addElement( pluginContent );
883                    pluginContent.executeParse( m_context );
884                }
885            } catch( final PluginException e ) {
886                LOG.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() );
887                //LOG.info( "Root cause:",e.getRootThrowable() );
888                if( !m_wysiwygEditorMode ) {
889                    final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE );
890                    return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ),
891                                                                        m_context.getRealPage().getWiki(),
892                                                                        m_context.getRealPage().getName(),
893                                                                        e.getMessage() ) ) );
894                }
895            }
896            return m_currentElement;
897        }
898
899        try {
900            final LinkParser.Link link = m_linkParser.parse( linktext );
901            linktext = link.getText();
902            String linkref = link.getReference();
903            //  Yes, we now have the components separated.
904            //  linktext = the text the link should have
905            //  linkref  = the url or page name.
906            //  In many cases these are the same.  [linktext|linkref].
907            if( m_linkParsingOperations.isVariableLink( linktext ) ) {
908                final Content el = new VariableContent( linktext );
909                addElement( el );
910            } else if( m_linkParsingOperations.isExternalLink( linkref ) ) {
911                // It's an external link, out of this Wiki
912                callMutatorChain( m_externalLinkMutatorChain, linkref );
913                if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
914                    handleImageLink( linkref, linktext, link.hasReference() );
915                } else {
916                    makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() );
917                    addElement( outlinkImage() );
918                }
919            } else if( link.isInterwikiLink() ) {
920                // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved.
921
922                // FIXME: There is an interesting issue here:  We probably should
923                //        URLEncode the wikiPage, but we can't since some of the
924                //        Wikis use slashes (/), which won't survive URLEncoding.
925                //        Besides, we don't know which character set the other Wiki
926                //        is using, so you'll have to write the entire name as it appears
927                //        in the URL.  Bugger.
928
929                final String extWiki = link.getExternalWiki();
930                final String wikiPage = link.getExternalWikiPage();
931                if( m_wysiwygEditorMode ) {
932                    makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() );
933                } else {
934                    String urlReference = m_engine.getInterWikiURL( extWiki );
935                    if( urlReference != null ) {
936                        urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage );
937                        urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference );
938
939                        if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) {
940                            handleImageLink( urlReference, linktext, link.hasReference() );
941                        } else {
942                            makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() );
943                        }
944                        if( m_linkParsingOperations.isExternalLink( urlReference ) ) {
945                            addElement( outlinkImage() );
946                        }
947                    } else {
948                        final Object[] args = { TextUtil.escapeHTMLEntities( extWiki ) };
949                        addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) );
950                    }
951                }
952            } else if( linkref.startsWith( "#" ) ) {
953                // It defines a local footnote
954                makeLink( LOCAL, linkref, linktext, null, link.getAttributes() );
955            } else if( TextUtil.isNumber( linkref ) ) {
956                // It defines a reference to a local footnote
957                makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() );
958            } else {
959                final int hashMark;
960
961                // Internal wiki link, but is it an attachment link?
962                String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref );
963                if( attachment != null ) {
964                    callMutatorChain( m_attachmentLinkMutatorChain, attachment );
965                    if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) {
966                        attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment );
967                        sb.append( handleImageLink( attachment, linktext, link.hasReference() ) );
968                    } else {
969                        makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() );
970                    }
971                } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) {
972                    // It's an internal Wiki link, but to a named section
973                    final String namedSection = linkref.substring( hashMark + 1 );
974                    linkref = linkref.substring( 0, hashMark );
975                    linkref = MarkupParser.cleanLink( linkref );
976                    callMutatorChain( m_localLinkMutatorChain, linkref );
977                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
978                    if( matchedLink != null ) {
979                        String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) );
980                        sectref = sectref.replace( '%', '_' );
981                        makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() );
982                    } else {
983                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
984                    }
985                } else {
986                    // It's an internal Wiki link
987                    linkref = MarkupParser.cleanLink( linkref );
988                    callMutatorChain( m_localLinkMutatorChain, linkref );
989                    final String matchedLink = m_linkParsingOperations.linkIfExists( linkref );
990                    if( matchedLink != null ) {
991                        makeLink( READ, matchedLink, linktext, null, link.getAttributes() );
992                    } else {
993                        makeLink( EDIT, linkref, linktext, null, link.getAttributes() );
994                    }
995                }
996            }
997
998        } catch( final ParseException e ) {
999            LOG.info( "Parser failure: ", e );
1000            final Object[] args = { e.getMessage() };
1001            addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) );
1002        }
1003        return m_currentElement;
1004    }
1005
1006    /**
1007     *  Pushes back any string that has been read.  It will obviously be pushed back in a reverse order.
1008     *
1009     *  @since 2.1.77
1010     */
1011    private void pushBack( final String s ) throws IOException {
1012        for( int i = s.length()-1; i >= 0; i-- ) {
1013            pushBack( s.charAt(i) );
1014        }
1015    }
1016
1017    private Element handleBackslash() throws IOException {
1018        final int ch = nextToken();
1019        if( ch == '\\' ) {
1020            final int ch2 = nextToken();
1021            if( ch2 == '\\' ) {
1022                pushElement( new Element( "br" ).setAttribute( "clear", "all" ) );
1023                return popElement( "br" );
1024            }
1025            pushBack( ch2 );
1026            pushElement( new Element( "br" ) );
1027            return popElement( "br" );
1028        }
1029        pushBack( ch );
1030        return null;
1031    }
1032
1033    private Element handleUnderscore() throws IOException {
1034        final int ch = nextToken();
1035        Element el = null;
1036        if( ch == '_' ) {
1037            if( m_isbold ) {
1038                el = popElement( "b" );
1039            } else {
1040                el = pushElement( new Element( "b" ) );
1041            }
1042            m_isbold = !m_isbold;
1043        } else {
1044            pushBack( ch );
1045        }
1046
1047        return el;
1048    }
1049
1050
1051    /**
1052     *  For example: italics.
1053     */
1054    private Element handleApostrophe() throws IOException {
1055        final int ch = nextToken();
1056        Element el = null;
1057
1058        if( ch == '\'' ) {
1059            if( m_isitalic ) {
1060                el = popElement( "i" );
1061            } else {
1062                el = pushElement( new Element( "i" ) );
1063            }
1064            m_isitalic = !m_isitalic;
1065        } else {
1066            pushBack( ch );
1067        }
1068
1069        return el;
1070    }
1071
1072    private Element handleOpenbrace( final boolean isBlock ) throws IOException {
1073        final int ch = nextToken();
1074        if( ch == '{' ) {
1075            final int ch2 = nextToken();
1076            if( ch2 == '{' ) {
1077                m_isPre = true;
1078                m_isEscaping = true;
1079                m_isPreBlock = isBlock;
1080                if( isBlock ) {
1081                    startBlockLevel();
1082                    return pushElement( new Element( "pre" ) );
1083                }
1084
1085                return pushElement( new Element( "span" ).setAttribute( "class", "inline-code" ) );
1086            }
1087            pushBack( ch2 );
1088            return pushElement( new Element( "tt" ) );
1089        }
1090        pushBack( ch );
1091        return null;
1092    }
1093
1094    /**
1095     *  Handles both }} and }}}
1096     */
1097    private Element handleClosebrace() throws IOException {
1098        final int ch2 = nextToken();
1099        if( ch2 == '}' ) {
1100            final int ch3 = nextToken();
1101            if( ch3 == '}' ) {
1102                if( m_isPre ) {
1103                    if( m_isPreBlock ) {
1104                        popElement( "pre" );
1105                    } else {
1106                        popElement( "span" );
1107                    }
1108                    m_isPre = false;
1109                    m_isEscaping = false;
1110                    return m_currentElement;
1111                }
1112                m_plainTextBuf.append( "}}}" );
1113                return m_currentElement;
1114            }
1115            pushBack( ch3 );
1116            if( !m_isEscaping ) {
1117                return popElement( "tt" );
1118            }
1119        }
1120        pushBack( ch2 );
1121        return null;
1122    }
1123
1124    private Element handleDash() throws IOException {
1125        int ch = nextToken();
1126        if( ch == '-' ) {
1127            final int ch2 = nextToken();
1128            if( ch2 == '-' ) {
1129                final int ch3 = nextToken();
1130                if( ch3 == '-' ) {
1131                    // Empty away all the rest of the dashes.
1132                    // Do not forget to return the first non-match back.
1133                    do {
1134                        ch = nextToken();
1135                    } while ( ch == '-' );
1136
1137                    pushBack( ch );
1138                    startBlockLevel();
1139                    pushElement( new Element( "hr" ) );
1140                    return popElement( "hr" );
1141                }
1142                pushBack( ch3 );
1143            }
1144            pushBack( ch2 );
1145        }
1146        pushBack( ch );
1147        return null;
1148    }
1149
1150    private Element handleHeading() throws IOException {
1151        final Element el;
1152        final int ch  = nextToken();
1153        final Heading hd = new Heading();
1154        if( ch == '!' ) {
1155            final int ch2 = nextToken();
1156            if( ch2 == '!' ) {
1157                final String title = peekAheadLine();
1158                el = makeHeading( Heading.HEADING_LARGE, title, hd );
1159            } else {
1160                pushBack( ch2 );
1161                final String title = peekAheadLine();
1162                el = makeHeading( Heading.HEADING_MEDIUM, title, hd );
1163            }
1164        } else {
1165            pushBack( ch );
1166            final String title = peekAheadLine();
1167            el = makeHeading( Heading.HEADING_SMALL, title, hd );
1168        }
1169
1170        callHeadingListenerChain( hd );
1171        m_lastHeading = hd;
1172        if( el != null ) {
1173            pushElement( el );
1174        }
1175        return el;
1176    }
1177
1178    /**
1179     * Reads the stream until the next EOL or EOF.  Note that it will also read the EOL from the stream.
1180     */
1181    private StringBuilder readUntilEOL() throws IOException {
1182        int ch;
1183        final StringBuilder buf = new StringBuilder( 256 );
1184        while( true ) {
1185            ch = nextToken();
1186            if( ch == -1 ) {
1187                break;
1188            }
1189            buf.append( (char) ch );
1190            if( ch == '\n' ) {
1191                break;
1192            }
1193        }
1194        return buf;
1195    }
1196
1197    /** Controls whether italic is restarted after a paragraph shift */
1198
1199    private boolean m_restartitalic;
1200    private boolean m_restartbold;
1201
1202    private boolean m_newLine;
1203
1204    /**
1205     * Starts a block level element, therefore closing a potential open paragraph tag.
1206     */
1207    private void startBlockLevel() {
1208        // These may not continue over block level limits in XHTML
1209        popElement( "i" );
1210        popElement( "b" );
1211        popElement( "tt" );
1212        if( m_isOpenParagraph ) {
1213            m_isOpenParagraph = false;
1214            popElement( "p" );
1215            m_plainTextBuf.append( "\n" ); // Just small beautification
1216        }
1217        m_restartitalic = m_isitalic;
1218        m_restartbold   = m_isbold;
1219        m_isitalic = false;
1220        m_isbold   = false;
1221    }
1222
1223    private static String getListType( final char c ) {
1224        if( c == '*' ) {
1225            return "ul";
1226        } else if( c == '#' ) {
1227            return "ol";
1228        }
1229        throw new InternalWikiException( "Parser got faulty list type: " + c );
1230    }
1231    /**
1232     * Like original handleOrderedList() and handleUnorderedList(),
1233     * however handles both ordered ('#') and unordered ('*') mixed together.
1234     */
1235    // FIXME: Refactor this; it's a bit messy.
1236    private Element handleGeneralList() throws IOException {
1237         startBlockLevel();
1238         String strBullets = readWhile( "*#" );
1239         // String strBulletsRaw = strBullets;      // to know what was original before phpwiki style substitution
1240         final int numBullets = strBullets.length();
1241
1242         // override the beginning portion of bullet pattern to be like the previous to simulate PHPWiki style lists
1243
1244        if( m_allowPHPWikiStyleLists ) {
1245            // only substitute if different
1246            if( !( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) ) {
1247                if( numBullets <= m_genlistlevel ) {
1248                    // Substitute all but the last character (keep the expressed bullet preference)
1249                    strBullets = ( numBullets > 1 ? m_genlistBulletBuffer.substring( 0, numBullets - 1 ) : "" ) +
1250                                 strBullets.charAt( numBullets - 1 );
1251                } else {
1252                    strBullets = m_genlistBulletBuffer + strBullets.substring( m_genlistlevel, numBullets );
1253                }
1254            }
1255        }
1256
1257         //  Check if this is still of the same type
1258        if( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) {
1259            if( numBullets > m_genlistlevel ) {
1260                pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel++ ) ) ) );
1261                for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) {
1262                    // bullets are growing, get from new bullet list
1263                    pushElement( new Element( "li" ) );
1264                    pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel ) ) ) );
1265                }
1266            } else if( numBullets < m_genlistlevel ) {
1267                //  Close the previous list item.
1268                popElement( "li" );
1269                for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) {
1270                    // bullets are shrinking, get from old bullet list
1271                    popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) );
1272                    if( m_genlistlevel > 0 ) {
1273                        popElement( "li" );
1274                    }
1275                }
1276            } else {
1277                if( m_genlistlevel > 0 ) {
1278                    popElement( "li" );
1279                }
1280            }
1281        } else {
1282            //  The pattern has changed, unwind and restart
1283            int numEqualBullets;
1284            final int numCheckBullets;
1285
1286            // find out how much is the same
1287            numEqualBullets = 0;
1288            numCheckBullets = Math.min( numBullets, m_genlistlevel );
1289
1290            while( numEqualBullets < numCheckBullets ) {
1291                // if the bullets are equal so far, keep going
1292                if( strBullets.charAt( numEqualBullets ) == m_genlistBulletBuffer.charAt( numEqualBullets ) )
1293                    numEqualBullets++;
1294                    // otherwise giveup, we have found how many are equal
1295                else
1296                    break;
1297            }
1298
1299            //unwind
1300            for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) {
1301                popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) );
1302                if( m_genlistlevel > numBullets ) {
1303                    popElement( "li" );
1304                }
1305            }
1306
1307            //rewind
1308            pushElement( new Element( getListType( strBullets.charAt( numEqualBullets++ ) ) ) );
1309            for( int i = numEqualBullets; i < numBullets; i++ ) {
1310                pushElement( new Element( "li" ) );
1311                pushElement( new Element( getListType( strBullets.charAt( i ) ) ) );
1312            }
1313            m_genlistlevel = numBullets;
1314        }
1315
1316         // Push a new list item, and eat away any extra whitespace
1317        pushElement( new Element( "li" ) );
1318        readWhile( " " );
1319
1320        // work done, remember the new bullet list (in place of old one)
1321        m_genlistBulletBuffer.setLength( 0 );
1322        m_genlistBulletBuffer.append( strBullets );
1323        return m_currentElement;
1324    }
1325
1326    private Element unwindGeneralList() {
1327        // unwind
1328        for( ; m_genlistlevel > 0; m_genlistlevel-- ) {
1329            popElement( "li" );
1330            popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) );
1331        }
1332        m_genlistBulletBuffer.setLength( 0 );
1333        return null;
1334    }
1335
1336
1337    private Element handleDefinitionList() {
1338        if( !m_isdefinition ) {
1339            m_isdefinition = true;
1340            startBlockLevel();
1341            pushElement( new Element( "dl" ) );
1342            return pushElement( new Element( "dt" ) );
1343        }
1344        return null;
1345    }
1346
1347    private Element handleOpenbracket() throws IOException {
1348        final StringBuilder sb = new StringBuilder( 40 );
1349        final int pos = getPosition();
1350        int ch = nextToken();
1351        boolean isPlugin = false;
1352        if( ch == '[' ) {
1353            if( m_wysiwygEditorMode ) {
1354                sb.append( '[' );
1355            }
1356            sb.append( ( char )ch );
1357            while( ( ch = nextToken() ) == '[' ) {
1358                sb.append( ( char )ch );
1359            }
1360        }
1361
1362        if( ch == '{' ) {
1363            isPlugin = true;
1364        }
1365
1366        pushBack( ch );
1367
1368        if( sb.length() > 0 ) {
1369            m_plainTextBuf.append( sb );
1370            return m_currentElement;
1371        }
1372
1373        //  Find end of hyperlink
1374        ch = nextToken();
1375        int nesting = 1; // Check for nested plugins
1376        while( ch != -1 ) {
1377            final int ch2 = nextToken();
1378            pushBack( ch2 );
1379            if( isPlugin ) {
1380                if( ch == '[' && ch2 == '{' ) {
1381                    nesting++;
1382                } else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) {
1383                    break;
1384                } else if( ch == '}' && ch2 == ']' ) {
1385                    // NB: This will be decremented once at the end
1386                    nesting--;
1387                }
1388            } else {
1389                if( ch == ']' ) {
1390                    break;
1391                }
1392            }
1393
1394            sb.append( (char) ch );
1395
1396            ch = nextToken();
1397        }
1398
1399        //  If the link is never finished, do some tricks to display the rest of the line unchanged.
1400        if( ch == -1 ) {
1401            LOG.debug( "Warning: unterminated link detected!" );
1402            m_isEscaping = true;
1403            m_plainTextBuf.append( sb );
1404            flushPlainText();
1405            m_isEscaping = false;
1406            return m_currentElement;
1407        }
1408
1409        return handleHyperlinks( sb.toString(), pos );
1410    }
1411
1412    /**
1413     *  Reads the stream until the current brace is closed or stream end.
1414     */
1415    private String readBraceContent( final char opening, final char closing ) throws IOException {
1416        final StringBuilder sb = new StringBuilder( 40 );
1417        int braceLevel = 1;
1418        int ch;
1419        while( ( ch = nextToken() ) != -1 ) {
1420            if( ch == '\\' ) {
1421                continue;
1422            } else if( ch == opening ) {
1423                braceLevel++;
1424            } else if( ch == closing ) {
1425                braceLevel--;
1426                if( braceLevel == 0 ) {
1427                    break;
1428                }
1429            }
1430            sb.append( ( char ) ch );
1431        }
1432        return sb.toString();
1433    }
1434
1435
1436    /**
1437     * Handles constructs of type %%(style) and %%class
1438     * @return An Element containing the div or span, depending on the situation.
1439     * @throws IOException
1440     */
1441    private Element handleDiv( ) throws IOException {
1442        int ch = nextToken();
1443        Element el = null;
1444
1445        if( ch == '%' ) {
1446            String style = null;
1447            String clazz = null;
1448
1449            ch = nextToken();
1450
1451            //  Style or class?
1452            if( ch == '(' ) {
1453                style = readBraceContent('(',')');
1454            } else if( Character.isLetter( (char) ch ) ) {
1455                pushBack( ch );
1456                clazz = readUntil( "( \t\n\r" );
1457                //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters
1458                //CSS Classnames can contain only the characters [a-zA-Z0-9] and
1459                //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_".
1460                //They cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1461
1462                //(1) replace '.' by spaces, allowing multiple classnames on a div or span
1463                //(2) remove any invalid character
1464                if( clazz != null ) {
1465                    clazz = clazz.replace( '.', ' ' )
1466                                 .replaceAll( "[^\\s-_\\w\\x200-\\x377]+", "" );
1467                }
1468                ch = nextToken();
1469
1470                // check for %%class1.class2( style information )
1471                if( ch == '(' ) {
1472                    style = readBraceContent( '(', ')' );
1473                //  Pop out only spaces, so that the upcoming EOL check does not check the next line.
1474                } else if( ch == '\n' || ch == '\r' ) {
1475                    pushBack( ch );
1476                }
1477            } else {
1478                // Anything else stops.
1479                pushBack( ch );
1480                try {
1481                    final Boolean isSpan = m_styleStack.pop();
1482                    if( isSpan == null ) {
1483                        // Fail quietly
1484                    } else if( isSpan ) {
1485                        el = popElement( "span" );
1486                    } else {
1487                        el = popElement( "div" );
1488                    }
1489                } catch( final EmptyStackException e ) {
1490                    LOG.debug( "Page '" + m_context.getName() + "' closes a %%-block that has not been opened." );
1491                    return m_currentElement;
1492                }
1493                return el;
1494            }
1495
1496            //  Check if there is an attempt to do something nasty
1497            try {
1498                style = StringEscapeUtils.unescapeHtml4(style);
1499                if( style != null && style.contains( "javascript:" ) ) {
1500                    LOG.debug( "Attempt to output javascript within CSS: {}", style );
1501                    final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1502                    return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) );
1503                }
1504            } catch( final NumberFormatException e ) {
1505                //  If there are unknown entities, we don't want the parser to stop.
1506                final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE );
1507                final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() );
1508                return addElement( makeError( msg ) );
1509            }
1510
1511            //  Decide if we should open a div or a span?
1512            final String eol = peekAheadLine();
1513
1514            if( !eol.trim().isEmpty() ) {
1515                // There is stuff after the class
1516                el = new Element("span");
1517                m_styleStack.push( Boolean.TRUE );
1518            } else {
1519                startBlockLevel();
1520                el = new Element("div");
1521                m_styleStack.push( Boolean.FALSE );
1522            }
1523
1524            if( style != null ) el.setAttribute("style", style);
1525            if( clazz != null ) el.setAttribute("class", clazz);
1526            return pushElement( el );
1527        }
1528        pushBack( ch );
1529        return el;
1530    }
1531
1532    private Element handleSlash( ) throws IOException {
1533        final int ch = nextToken();
1534        pushBack( ch );
1535        if( ch == '%' && !m_styleStack.isEmpty() ) {
1536            return handleDiv();
1537        }
1538
1539        return null;
1540    }
1541
1542    private Element handleBar( final boolean newLine ) throws IOException {
1543        Element el;
1544        if( !m_istable && !newLine ) {
1545            return null;
1546        }
1547
1548        //  If the bar is in the first column, we will either start a new table or continue the old one.
1549        if( newLine ) {
1550            if( !m_istable ) {
1551                startBlockLevel();
1552                el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") );
1553                m_istable = true;
1554                m_rowNum = 0;
1555            }
1556
1557            m_rowNum++;
1558            final Element tr = ( m_rowNum % 2 != 0 )
1559                       ? new Element("tr").setAttribute("class", "odd")
1560                       : new Element("tr");
1561            el = pushElement( tr );
1562        }
1563
1564        //  Check out which table cell element to start; a header element (th) or a regular element (td).
1565        final int ch = nextToken();
1566        if( ch == '|' ) {
1567            if( !newLine ) {
1568                el = popElement("th");
1569                if( el == null ) popElement("td");
1570            }
1571            el = pushElement( new Element("th") );
1572        } else {
1573            if( !newLine ) {
1574                el = popElement( "td" );
1575                if( el == null ) popElement( "th" );
1576            }
1577            el = pushElement( new Element("td") );
1578            pushBack( ch );
1579        }
1580        return el;
1581    }
1582
1583    /**
1584     *  Generic escape of next character or entity.
1585     */
1586    private Element handleTilde() throws IOException {
1587        final int ch = nextToken();
1588
1589        if( ch == ' ' ) {
1590            if( m_wysiwygEditorMode ) {
1591                m_plainTextBuf.append( "~ " );
1592            }
1593            return m_currentElement;
1594        }
1595
1596        if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' ||
1597            ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' ||
1598            ch == '{' || ch == ']' || ch == '}' || ch == '%' ) {
1599            if( m_wysiwygEditorMode ) {
1600                m_plainTextBuf.append( '~' );
1601            }
1602            m_plainTextBuf.append( ( char ) ch );
1603            m_plainTextBuf.append( readWhile( "" + ( char ) ch ) );
1604            return m_currentElement;
1605        }
1606        // No escape.
1607        pushBack( ch );
1608        return null;
1609    }
1610
1611    private void fillBuffer( final Element startElement ) throws IOException {
1612        m_currentElement = startElement;
1613        m_newLine = true;
1614        boolean quitReading = false;
1615        disableOutputEscaping();
1616        while( !quitReading ) {
1617            final int ch = nextToken();
1618            if( ch == -1 ) {
1619                break;
1620            }
1621
1622            //  Check if we're actually ending the preformatted mode. We still must do an entity transformation here.
1623            if( m_isEscaping ) {
1624                if( ch == '}' ) {
1625                    if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch );
1626                } else if( ch == -1 ) {
1627                    quitReading = true;
1628                }
1629                else if( ch == '\r' ) {
1630                    // DOS line feeds we ignore.
1631                } else if( ch == '<' ) {
1632                    m_plainTextBuf.append( "&lt;" );
1633                } else if( ch == '>' ) {
1634                    m_plainTextBuf.append( "&gt;" );
1635                } else if( ch == '&' ) {
1636                    m_plainTextBuf.append( "&amp;" );
1637                } else if( ch == '~' ) {
1638                    String braces = readWhile( "}" );
1639                    if( braces.length() >= 3 ) {
1640                        m_plainTextBuf.append( "}}}" );
1641                        braces = braces.substring(3);
1642                    } else {
1643                        m_plainTextBuf.append( (char) ch );
1644                    }
1645
1646                    for( int i = braces.length()-1; i >= 0; i-- ) {
1647                        pushBack( braces.charAt( i ) );
1648                    }
1649                } else {
1650                    m_plainTextBuf.append( (char) ch );
1651                }
1652
1653                continue;
1654            }
1655
1656            //  An empty line stops a list
1657            if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) {
1658                m_plainTextBuf.append(unwindGeneralList());
1659            }
1660
1661            if( m_newLine && ch != '|' && m_istable ) {
1662                popElement( "table" );
1663                m_istable = false;
1664            }
1665
1666            int skip = IGNORE;
1667            //  Do the actual parsing and catch any errors.
1668            try {
1669                skip = parseToken( ch );
1670            } catch( final IllegalDataException e ) {
1671                LOG.info( "Page {} contains data which cannot be added to DOM tree: {}", m_context.getPage().getName(), e.getMessage() );
1672                makeError( "Error: " + cleanupSuspectData( e.getMessage() ) );
1673            }
1674
1675            // The idea is as follows:  If the handler method returns an element (el != null), it is assumed that it
1676            // has been added in the stack.  Otherwise, the character is added as is to the plaintext buffer.
1677            //
1678            // For the transition phase, if s != null, it also gets added in the plaintext buffer.
1679            switch( skip ) {
1680                case ELEMENT:
1681                    m_newLine = false;
1682                    break;
1683
1684                case CHARACTER:
1685                    m_plainTextBuf.append( (char) ch );
1686                    m_newLine = false;
1687                    break;
1688
1689                case IGNORE:
1690                default:
1691                    break;
1692            }
1693        }
1694
1695        closeHeadings();
1696        popElement( "domroot" );
1697    }
1698
1699    private String cleanupSuspectData( final String s ) {
1700        final StringBuilder sb = new StringBuilder( s.length() );
1701        for( int i = 0; i < s.length(); i++ ) {
1702            final char c = s.charAt(i);
1703            if( Verifier.isXMLCharacter( c ) ) sb.append( c );
1704            else sb.append( "0x" ).append( Integer.toString( c, 16 ).toUpperCase() );
1705        }
1706
1707        return sb.toString();
1708    }
1709
1710    /** The token is a plain character. */
1711    protected static final int CHARACTER = 0;
1712
1713    /** The token is a wikimarkup element. */
1714    protected static final int ELEMENT   = 1;
1715
1716    /** The token is to be ignored. */
1717    protected static final int IGNORE    = 2;
1718
1719    /**
1720     *  Return CHARACTER, if you think this was a plain character; ELEMENT, if
1721     *  you think this was a wiki markup element, and IGNORE, if you think
1722     *  we should ignore this altogether.
1723     *  <p>
1724     *  To add your own MarkupParser, you can override this method, but it
1725     *  is recommended that you call super.parseToken() as well to gain advantage
1726     *  of JSPWiki's own markup.  You can call it at the start of your own
1727     *  parseToken() or end - it does not matter.
1728     *
1729     * @param ch The character under investigation
1730     * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
1731     * @throws IOException If parsing fails.
1732     */
1733    protected int parseToken( final int ch ) throws IOException {
1734        Element el = null;
1735        //  Now, check the incoming token.
1736        switch( ch ) {
1737          case '\r':
1738            // DOS linefeeds we forget
1739            return IGNORE;
1740
1741          case '\n':
1742            //  Close things like headings, etc.
1743            // FIXME: This is not really very fast
1744            closeHeadings();
1745
1746            popElement( "dl" ); // Close definition lists.
1747            if( m_istable ) {
1748                popElement("tr");
1749            }
1750            m_isdefinition = false;
1751            if( m_newLine ) {
1752                // Paragraph change.
1753                startBlockLevel();
1754                //  Figure out which elements cannot be enclosed inside a <p></p> pair according to XHTML rules.
1755                final String nextLine = peekAheadLine();
1756                if( nextLine.isEmpty() ||
1757                     ( !nextLine.isEmpty() &&
1758                       !nextLine.startsWith( "{{{" ) &&
1759                       !nextLine.startsWith( "----" ) &&
1760                       !nextLine.startsWith( "%%" ) &&
1761                       "*#!;".indexOf( nextLine.charAt( 0 ) ) == -1 ) ) {
1762                    pushElement( new Element( "p" ) );
1763                    m_isOpenParagraph = true;
1764
1765                    if( m_restartitalic ) {
1766                        pushElement( new Element( "i" ) );
1767                        m_isitalic = true;
1768                        m_restartitalic = false;
1769                    }
1770                    if( m_restartbold ) {
1771                        pushElement( new Element( "b" ) );
1772                        m_isbold = true;
1773                        m_restartbold = false;
1774                    }
1775                }
1776            } else {
1777                m_plainTextBuf.append("\n");
1778                m_newLine = true;
1779            }
1780            return IGNORE;
1781
1782          case '\\':
1783            el = handleBackslash();
1784            break;
1785
1786          case '_':
1787            el = handleUnderscore();
1788            break;
1789
1790          case '\'':
1791            el = handleApostrophe();
1792            break;
1793
1794          case '{':
1795            el = handleOpenbrace( m_newLine );
1796            break;
1797
1798          case '}':
1799            el = handleClosebrace();
1800            break;
1801
1802          case '-':
1803            if( m_newLine ) {
1804                el = handleDash();
1805            }
1806            break;
1807
1808          case '!':
1809            if( m_newLine ) {
1810                el = handleHeading();
1811            }
1812            break;
1813
1814          case ';':
1815            if( m_newLine ) {
1816                el = handleDefinitionList();
1817            }
1818            break;
1819
1820          case ':':
1821            if( m_isdefinition ) {
1822                popElement( "dt" );
1823                el = pushElement( new Element( "dd" ) );
1824                m_isdefinition = false;
1825            }
1826            break;
1827
1828          case '[':
1829            el = handleOpenbracket();
1830            break;
1831
1832          case '*':
1833            if( m_newLine ) {
1834                pushBack( '*' );
1835                el = handleGeneralList();
1836            }
1837            break;
1838
1839          case '#':
1840            if( m_newLine ) {
1841                pushBack( '#' );
1842                el = handleGeneralList();
1843            }
1844            break;
1845
1846          case '|':
1847            el = handleBar( m_newLine );
1848            break;
1849
1850          case '~':
1851            el = handleTilde();
1852            break;
1853
1854          case '%':
1855            el = handleDiv();
1856            break;
1857
1858          case '/':
1859            el = handleSlash();
1860            break;
1861
1862          default:
1863            break;
1864        }
1865
1866        return el != null ? ELEMENT : CHARACTER;
1867    }
1868
1869    private void closeHeadings() {
1870        if( m_lastHeading != null && !m_wysiwygEditorMode ) {
1871            // Add the hash anchor element at the end of the heading
1872            addElement( new Element("a").setAttribute( "class",HASHLINK )
1873                                              .setAttribute( "href","#" + m_lastHeading.m_titleAnchor )
1874                                              .setText( "#" ) );
1875            m_lastHeading = null;
1876        }
1877        popElement( "h2" );
1878        popElement( "h3" );
1879        popElement( "h4" );
1880    }
1881
1882    /**
1883     *  Parses the entire document from the Reader given in the constructor or set by {@link #setInputReader(Reader)}.
1884     *
1885     *  @return A WikiDocument, ready to be passed to the renderer.
1886     *  @throws IOException If parsing cannot be accomplished.
1887     */
1888    @Override
1889    public WikiDocument parse() throws IOException {
1890        final WikiDocument d = new WikiDocument( m_context.getPage() );
1891        d.setContext( m_context );
1892        final Element rootElement = new Element( "domroot" );
1893        d.setRootElement( rootElement );
1894        fillBuffer( rootElement );
1895        paragraphify( rootElement );
1896
1897        return d;
1898    }
1899
1900    /**
1901     *  Checks out that the first paragraph is correctly installed.
1902     *
1903     *  @param rootElement element to be checked.
1904     */
1905    private void paragraphify( final Element rootElement) {
1906        //  Add the paragraph tag to the first paragraph
1907        final List< Content > kids = rootElement.getContent();
1908        if( rootElement.getChild( "p" ) != null ) {
1909            final ArrayList<Content> ls = new ArrayList<>();
1910            int idxOfFirstContent = 0;
1911            int count = 0;
1912
1913            for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) {
1914                final Content c = i.next();
1915                if( c instanceof Element ) {
1916                    final String name = ( ( Element )c ).getName();
1917                    if( isBlockLevel( name ) ) {
1918                        break;
1919                    }
1920                }
1921
1922                if( !( c instanceof ProcessingInstruction ) ) {
1923                    ls.add( c );
1924                    if( idxOfFirstContent == 0 ) {
1925                        idxOfFirstContent = count;
1926                    }
1927                }
1928            }
1929
1930            //  If there were any elements, then add a new <p> (unless it would be an empty one)
1931            if(!ls.isEmpty()) {
1932                final Element newel = new Element("p");
1933                for( final Content c : ls ) {
1934                    c.detach();
1935                    newel.addContent( c );
1936                }
1937
1938                // Make sure there are no empty <p/> tags added.
1939                if( !newel.getTextTrim().isEmpty() || !newel.getChildren().isEmpty() ) {
1940                    rootElement.addContent( idxOfFirstContent, newel );
1941                }
1942            }
1943        }
1944    }
1945
1946}