001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018*/
019package org.apache.wiki.parser;
020
021import org.apache.logging.log4j.LogManager;
022import org.apache.logging.log4j.Logger;
023import org.apache.oro.text.GlobCompiler;
024import org.apache.oro.text.regex.MalformedPatternException;
025import org.apache.oro.text.regex.Pattern;
026import org.apache.oro.text.regex.PatternCompiler;
027import org.apache.wiki.StringTransmutator;
028import org.apache.wiki.api.core.Context;
029import org.apache.wiki.api.core.Engine;
030import org.apache.wiki.util.TextUtil;
031import org.jdom2.Element;
032
033import java.io.BufferedReader;
034import java.io.IOException;
035import java.io.PushbackReader;
036import java.io.Reader;
037import java.util.ArrayList;
038import java.util.Collection;
039import java.util.Collections;
040import java.util.List;
041
042/**
043 *   Provides an abstract class for the parser instances.
044 *
045 *   @since  2.4
046 */
047public abstract class MarkupParser {
048
049    /** Allow this many characters to be pushed back in the stream.  In effect, this limits the size of a single line.  */
050    protected static final int PUSHBACK_BUFFER_SIZE = 10*1024;
051    protected PushbackReader m_in;
052    private int m_pos = -1; // current position in reader stream
053
054    protected final Engine m_engine;
055    protected final Context m_context;
056
057    /** Optionally stores internal wikilinks */
058    protected final ArrayList< StringTransmutator > m_localLinkMutatorChain = new ArrayList<>();
059    protected final ArrayList< StringTransmutator > m_externalLinkMutatorChain = new ArrayList<>();
060    protected final ArrayList< StringTransmutator > m_attachmentLinkMutatorChain = new ArrayList<>();
061    protected final ArrayList< StringTransmutator > m_linkMutators = new ArrayList<>();
062    protected final ArrayList< HeadingListener > m_headingListenerChain = new ArrayList<>();
063
064    protected boolean m_inlineImages = true;
065    protected boolean m_parseAccessRules = true;
066    /** Keeps image regexp Patterns */
067    protected List< Pattern > m_inlineImagePatterns;
068    protected final LinkParsingOperations m_linkParsingOperations;
069
070    private static final Logger log = LogManager.getLogger( MarkupParser.class );
071
072    /** If set to "true", allows using raw HTML within Wiki text.  Be warned, this is a VERY dangerous option to set -
073       never turn this on in a publicly allowable Wiki, unless you are absolutely certain of what you're doing. */
074    public static final String PROP_ALLOWHTML = "jspwiki.translatorReader.allowHTML";
075
076    /** If set to "true", enables plugins during parsing */
077    public static final String PROP_RUNPLUGINS = "jspwiki.translatorReader.runPlugins";
078
079    /** If true, all outward links (external links) have a small link image appended. */
080    public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage";
081
082    /** If set to "true", all external links are tagged with 'rel="nofollow"' */
083    public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow";
084
085    /** If true, consider CamelCase hyperlinks as well. */
086    public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks";
087
088    /** If true, all hyperlinks are translated as well, regardless whether they
089     are surrounded by brackets. */
090    public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris";
091
092    /** If true, all outward attachment info links have a small link image appended. */
093    public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
094
095    public static final String HASHLINK = "hashlink";
096
097    /** Name of the outlink image; relative path to the JSPWiki directory. */
098    public static final String OUTLINK_IMAGE = "images/out.png";
099    /** Outlink css class. */
100    public static final String OUTLINK = "outlink";
101
102    private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns";
103
104    /** The value for anchor element <tt>class</tt> attributes when used for wiki page (normal) links. The value is "wikipage". */
105   public static final String CLASS_WIKIPAGE = "wikipage";
106
107   /** The value for anchor element <tt>class</tt> attributes when used for edit page links. The value is "createpage". */
108   public static final String CLASS_EDITPAGE = "createpage";
109
110   /** The value for anchor element <tt>class</tt> attributes when used for interwiki page links. The value is "interwiki". */
111   public static final String CLASS_INTERWIKI = "interwiki";
112
113   /** The value for anchor element <tt>class</tt> attributes when used for footnote links. The value is "footnote". */
114   public static final String CLASS_FOOTNOTE = "footnote";
115
116   /** The value for anchor element <tt>class</tt> attributes when used for footnote links. The value is "footnote". */
117   public static final String CLASS_FOOTNOTE_REF = "footnoteref";
118
119   /** The value for anchor element <tt>class</tt> attributes when used for external links. The value is "external". */
120   public static final String CLASS_EXTERNAL = "external";
121
122   /** The value for anchor element <tt>class</tt> attributes when used for attachments. The value is "attachment". */
123   public static final String CLASS_ATTACHMENT = "attachment";
124
125   public static final String[] CLASS_TYPES = {
126      CLASS_WIKIPAGE,
127      CLASS_EDITPAGE,
128      "",
129      CLASS_FOOTNOTE,
130      CLASS_FOOTNOTE_REF,
131      "",
132      CLASS_EXTERNAL,
133      CLASS_INTERWIKI,
134      CLASS_EXTERNAL,
135      CLASS_WIKIPAGE,
136      CLASS_ATTACHMENT
137   };
138
139    /**
140     *  Constructs a MarkupParser.  The subclass must call this constructor to set up the necessary bits and pieces.
141     *
142     *  @param context The WikiContext.
143     *  @param in The reader from which we are reading the bytes from.
144     */
145    protected MarkupParser( final Context context, final Reader in ) {
146        m_engine = context.getEngine();
147        m_context = context;
148        m_linkParsingOperations = new LinkParsingOperations( m_context );
149        setInputReader( in );
150    }
151
152    /**
153     *  Replaces the current input character stream with a new one.
154     *
155     *  @param in New source for input.  If null, this method does nothing.
156     *  @return the old stream
157     */
158    public Reader setInputReader( final Reader in ) {
159        final Reader old = m_in;
160        if( in != null ) {
161            m_in = new PushbackReader( new BufferedReader( in ), PUSHBACK_BUFFER_SIZE );
162        }
163
164        return old;
165    }
166
167    /**
168     *  Adds a hook for processing link texts.  This hook is called when the link text is written into the output stream, and
169     *  you may use it to modify the text.  It does not affect the actual link, only the user-visible text.
170     *
171     *  @param mutator The hook to call.  Null is safe.
172     */
173    public void addLinkTransmutator( final StringTransmutator mutator ) {
174        addLinkHook( m_linkMutators, mutator );
175    }
176
177    /**
178     *  Adds a hook for processing local links.  The engine transforms both non-existing and existing page links.
179     *
180     *  @param mutator The hook to call.  Null is safe.
181     */
182    public void addLocalLinkHook( final StringTransmutator mutator ) {
183        addLinkHook( m_localLinkMutatorChain, mutator );
184    }
185
186    /**
187     *  Adds a hook for processing external links.  This includes all http:// ftp://, etc. links, including inlined images.
188     *
189     *  @param mutator The hook to call.  Null is safe.
190     */
191    public void addExternalLinkHook( final StringTransmutator mutator ) {
192        addLinkHook( m_externalLinkMutatorChain, mutator );
193    }
194
195    /**
196     *  Adds a hook for processing attachment links.
197     *
198     *  @param mutator The hook to call.  Null is safe.
199     */
200    public void addAttachmentLinkHook( final StringTransmutator mutator ) {
201        addLinkHook( m_attachmentLinkMutatorChain, mutator );
202    }
203
204    void addLinkHook( final List< StringTransmutator > mutatorChain, final StringTransmutator mutator ) {
205        if( mutator != null ) {
206            mutatorChain.add( mutator );
207        }
208    }
209
210    /**
211     *  Adds a HeadingListener to the parser chain.  It will be called whenever a parsed header is found.
212     *
213     *  @param listener The listener to add.
214     */
215    public void addHeadingListener( final HeadingListener listener ) {
216        if( listener != null ) {
217            m_headingListenerChain.add( listener );
218        }
219    }
220
221    /**
222     *  Disables access rule parsing.
223     */
224    public void disableAccessRules()
225    {
226        m_parseAccessRules = false;
227    }
228
229    public boolean isParseAccessRules()
230    {
231        return m_parseAccessRules;
232    }
233
234    /**
235     *  Use this to turn on or off image inlining.
236     *
237     *  @param toggle If true, images are inlined (as per set in jspwiki.properties)
238     *                If false, then images won't be inlined; instead, they will be
239     *                treated as standard hyperlinks.
240     *  @since 2.2.9
241     */
242    public void enableImageInlining( final boolean toggle )
243    {
244        m_inlineImages = toggle;
245    }
246
247    public boolean isImageInlining() {
248        return m_inlineImages;
249    }
250
251    protected final void initInlineImagePatterns() {
252        final PatternCompiler compiler = new GlobCompiler();
253
254        //  We cache compiled patterns in the engine, since their creation is really expensive
255        List< Pattern > compiledpatterns = m_engine.getAttribute( INLINE_IMAGE_PATTERNS );
256
257        if( compiledpatterns == null ) {
258            compiledpatterns = new ArrayList< >( 20 );
259            final Collection< String > ptrns = m_engine.getAllInlinedImagePatterns();
260
261            //  Make them into Regexp Patterns.  Unknown patterns are ignored.
262            for( final String pattern : ptrns ) {
263                try {
264                    compiledpatterns.add( compiler.compile( pattern, GlobCompiler.DEFAULT_MASK | GlobCompiler.READ_ONLY_MASK ) );
265                } catch( final MalformedPatternException e ) {
266                    log.error( "Malformed pattern [" + pattern + "] in properties: ", e );
267                }
268            }
269
270            m_engine.setAttribute( INLINE_IMAGE_PATTERNS, compiledpatterns );
271        }
272
273        m_inlineImagePatterns = Collections.unmodifiableList( compiledpatterns );
274    }
275
276    public List< Pattern > getInlineImagePatterns() {
277        if( m_inlineImagePatterns == null ) {
278            initInlineImagePatterns();
279        }
280        return m_inlineImagePatterns;
281    }
282
283    /**
284     *  Parses the document.
285     *
286     *  @return the parsed document, as a WikiDocument
287     *  @throws IOException If something goes wrong.
288     */
289    public abstract WikiDocument parse() throws IOException;
290
291    /**
292     *  Return the current position in the reader stream. The value will be -1 prior to reading.
293     *
294     * @return the reader position as an int.
295     */
296    public int getPosition()
297    {
298        return m_pos;
299    }
300
301    /**
302     * Returns the next token in the stream.  This is the most called method in the entire parser, so it needs to be lean and mean.
303     *
304     * @return The next token in the stream; or, if the stream is ended, -1.
305     * @throws IOException If something bad happens
306     * @throws NullPointerException If you have not yet created an input document.
307     */
308    protected final int nextToken() throws IOException, NullPointerException {
309        // if( m_in == null ) return -1;
310        m_pos++;
311        return m_in.read();
312    }
313
314    /**
315     *  Push back any character to the current input.  Does not push back a read EOF, though.
316     *
317     *  @param c Character to push back.
318     *  @throws IOException In case the character cannot be pushed back.
319     */
320    protected void pushBack( final int c ) throws IOException {
321        if( c != -1 && m_in != null ) {
322            m_pos--;
323            m_in.unread( c );
324        }
325    }
326
327    /**
328     *  Writes HTML for error message.  Does not add it to the document, you have to do it yourself.
329     *
330     *  @param error The error string.
331     *  @return An Element containing the error.
332     */
333    public static Element makeError( final String error ) {
334        return new Element( "span" ).setAttribute( "class", "error" ).addContent( error );
335    }
336
337    /**
338     *  Cleans a Wiki name.  The functionality of this method was changed in 2.6 so that the list of allowed characters is much larger.
339     *  Use {@link #wikifyLink(String)} to get the legacy behaviour.
340     *  <P>
341     *  [ This is a link ] -&gt; This is a link
342     *
343     *  @param link Link to be cleared. Null is safe, and causes this to return null.
344     *  @return A cleaned link.
345     *
346     *  @since 2.0
347     */
348    public static String cleanLink( final String link ) {
349        return TextUtil.cleanString( link, TextUtil.PUNCTUATION_CHARS_ALLOWED );
350    }
351
352    /**
353     *  Cleans away extra legacy characters.  This method functions exactly like pre-2.6 cleanLink()
354     *  <P>
355     *  [ This is a link ] -&gt; ThisIsALink
356     *
357     *  @param link Link to be cleared. Null is safe, and causes this to return null.
358     *  @return A cleaned link.
359     *  @since 2.6
360     */
361    public static String wikifyLink( final String link ) {
362        return TextUtil.cleanString( link, TextUtil.LEGACY_CHARS_ALLOWED );
363    }
364
365}