001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018*/
019package org.apache.wiki.parser;
020
021import org.apache.log4j.Logger;
022import org.apache.oro.text.GlobCompiler;
023import org.apache.oro.text.regex.MalformedPatternException;
024import org.apache.oro.text.regex.Pattern;
025import org.apache.oro.text.regex.PatternCompiler;
026import org.apache.wiki.StringTransmutator;
027import org.apache.wiki.api.core.Context;
028import org.apache.wiki.api.core.Engine;
029import org.apache.wiki.util.TextUtil;
030import org.jdom2.Element;
031
032import java.io.BufferedReader;
033import java.io.IOException;
034import java.io.PushbackReader;
035import java.io.Reader;
036import java.util.ArrayList;
037import java.util.Collection;
038import java.util.Collections;
039import java.util.List;
040
041/**
042 *   Provides an abstract class for the parser instances.
043 *
044 *   @since  2.4
045 */
046public abstract class MarkupParser {
047
048    /** Allow this many characters to be pushed back in the stream.  In effect, this limits the size of a single line.  */
049    protected static final int PUSHBACK_BUFFER_SIZE = 10*1024;
050    protected PushbackReader m_in;
051    private int m_pos = -1; // current position in reader stream
052
053    protected Engine m_engine;
054    protected Context m_context;
055
056    /** Optionally stores internal wikilinks */
057    protected ArrayList< StringTransmutator > m_localLinkMutatorChain = new ArrayList<>();
058    protected ArrayList< StringTransmutator > m_externalLinkMutatorChain = new ArrayList<>();
059    protected ArrayList< StringTransmutator > m_attachmentLinkMutatorChain = new ArrayList<>();
060    protected ArrayList< StringTransmutator > m_linkMutators = new ArrayList<>();
061    protected ArrayList< HeadingListener > m_headingListenerChain = new ArrayList<>();
062
063    protected boolean m_inlineImages = true;
064    protected boolean m_parseAccessRules = true;
065    /** Keeps image regexp Patterns */
066    protected List< Pattern > m_inlineImagePatterns = null;
067    protected LinkParsingOperations m_linkParsingOperations;
068
069    private static final Logger log = Logger.getLogger( MarkupParser.class );
070
071    /** If set to "true", allows using raw HTML within Wiki text.  Be warned, this is a VERY dangerous option to set -
072       never turn this on in a publicly allowable Wiki, unless you are absolutely certain of what you're doing. */
073    public static final String PROP_ALLOWHTML = "jspwiki.translatorReader.allowHTML";
074
075    /** If set to "true", enables plugins during parsing */
076    public static final String PROP_RUNPLUGINS = "jspwiki.translatorReader.runPlugins";
077
078    /** If true, all outward links (external links) have a small link image appended. */
079    public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage";
080
081    /** If set to "true", all external links are tagged with 'rel="nofollow"' */
082    public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow";
083
084    /** If true, consider CamelCase hyperlinks as well. */
085    public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks";
086
087    /** If true, all hyperlinks are translated as well, regardless whether they
088     are surrounded by brackets. */
089    public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris";
090
091    /** If true, all outward attachment info links have a small link image appended. */
092    public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
093
094    public static final String HASHLINK = "hashlink";
095
096    /** Name of the outlink image; relative path to the JSPWiki directory. */
097    public static final String OUTLINK_IMAGE = "images/out.png";
098    /** Outlink css class. */
099    public static final String OUTLINK = "outlink";
100
101    private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns";
102
103    /** The value for anchor element <tt>class</tt> attributes when used for wiki page (normal) links. The value is "wikipage". */
104   public static final String CLASS_WIKIPAGE = "wikipage";
105
106   /** The value for anchor element <tt>class</tt> attributes when used for edit page links. The value is "createpage". */
107   public static final String CLASS_EDITPAGE = "createpage";
108
109   /** The value for anchor element <tt>class</tt> attributes when used for interwiki page links. The value is "interwiki". */
110   public static final String CLASS_INTERWIKI = "interwiki";
111
112   /** The value for anchor element <tt>class</tt> attributes when used for footnote links. The value is "footnote". */
113   public static final String CLASS_FOOTNOTE = "footnote";
114
115   /** The value for anchor element <tt>class</tt> attributes when used for footnote links. The value is "footnote". */
116   public static final String CLASS_FOOTNOTE_REF = "footnoteref";
117
118   /** The value for anchor element <tt>class</tt> attributes when used for external links. The value is "external". */
119   public static final String CLASS_EXTERNAL = "external";
120
121   /** The value for anchor element <tt>class</tt> attributes when used for attachments. The value is "attachment". */
122   public static final String CLASS_ATTACHMENT = "attachment";
123
124   public static final String[] CLASS_TYPES = {
125      CLASS_WIKIPAGE,
126      CLASS_EDITPAGE,
127      "",
128      CLASS_FOOTNOTE,
129      CLASS_FOOTNOTE_REF,
130      "",
131      CLASS_EXTERNAL,
132      CLASS_INTERWIKI,
133      CLASS_EXTERNAL,
134      CLASS_WIKIPAGE,
135      CLASS_ATTACHMENT
136   };
137
138    /**
139     *  Constructs a MarkupParser.  The subclass must call this constructor to set up the necessary bits and pieces.
140     *
141     *  @param context The WikiContext.
142     *  @param in The reader from which we are reading the bytes from.
143     */
144    protected MarkupParser( final Context context, final Reader in ) {
145        m_engine = context.getEngine();
146        m_context = context;
147        m_linkParsingOperations = new LinkParsingOperations( m_context );
148        setInputReader( in );
149    }
150
151    /**
152     *  Replaces the current input character stream with a new one.
153     *
154     *  @param in New source for input.  If null, this method does nothing.
155     *  @return the old stream
156     */
157    public Reader setInputReader( final Reader in ) {
158        final Reader old = m_in;
159        if( in != null ) {
160            m_in = new PushbackReader( new BufferedReader( in ), PUSHBACK_BUFFER_SIZE );
161        }
162
163        return old;
164    }
165
166    /**
167     *  Adds a hook for processing link texts.  This hook is called when the link text is written into the output stream, and
168     *  you may use it to modify the text.  It does not affect the actual link, only the user-visible text.
169     *
170     *  @param mutator The hook to call.  Null is safe.
171     */
172    public void addLinkTransmutator( final StringTransmutator mutator ) {
173        addLinkHook( m_linkMutators, mutator );
174    }
175
176    /**
177     *  Adds a hook for processing local links.  The engine transforms both non-existing and existing page links.
178     *
179     *  @param mutator The hook to call.  Null is safe.
180     */
181    public void addLocalLinkHook( final StringTransmutator mutator ) {
182        addLinkHook( m_localLinkMutatorChain, mutator );
183    }
184
185    /**
186     *  Adds a hook for processing external links.  This includes all http:// ftp://, etc. links, including inlined images.
187     *
188     *  @param mutator The hook to call.  Null is safe.
189     */
190    public void addExternalLinkHook( final StringTransmutator mutator ) {
191        addLinkHook( m_externalLinkMutatorChain, mutator );
192    }
193
194    /**
195     *  Adds a hook for processing attachment links.
196     *
197     *  @param mutator The hook to call.  Null is safe.
198     */
199    public void addAttachmentLinkHook( final StringTransmutator mutator ) {
200        addLinkHook( m_attachmentLinkMutatorChain, mutator );
201    }
202
203    void addLinkHook( final List< StringTransmutator > mutatorChain, final StringTransmutator mutator ) {
204        if( mutator != null ) {
205            mutatorChain.add( mutator );
206        }
207    }
208
209    /**
210     *  Adds a HeadingListener to the parser chain.  It will be called whenever a parsed header is found.
211     *
212     *  @param listener The listener to add.
213     */
214    public void addHeadingListener( final HeadingListener listener ) {
215        if( listener != null ) {
216            m_headingListenerChain.add( listener );
217        }
218    }
219
220    /**
221     *  Disables access rule parsing.
222     */
223    public void disableAccessRules()
224    {
225        m_parseAccessRules = false;
226    }
227
228    public boolean isParseAccessRules()
229    {
230        return m_parseAccessRules;
231    }
232
233    /**
234     *  Use this to turn on or off image inlining.
235     *
236     *  @param toggle If true, images are inlined (as per set in jspwiki.properties)
237     *                If false, then images won't be inlined; instead, they will be
238     *                treated as standard hyperlinks.
239     *  @since 2.2.9
240     */
241    public void enableImageInlining( final boolean toggle )
242    {
243        m_inlineImages = toggle;
244    }
245
246    public boolean isImageInlining() {
247        return m_inlineImages;
248    }
249
250    protected final void initInlineImagePatterns() {
251        final PatternCompiler compiler = new GlobCompiler();
252
253        //  We cache compiled patterns in the engine, since their creation is really expensive
254        List< Pattern > compiledpatterns = m_engine.getAttribute( INLINE_IMAGE_PATTERNS );
255
256        if( compiledpatterns == null ) {
257            compiledpatterns = new ArrayList< >( 20 );
258            final Collection< String > ptrns = m_engine.getAllInlinedImagePatterns();
259
260            //  Make them into Regexp Patterns.  Unknown patterns are ignored.
261            for( final String pattern : ptrns ) {
262                try {
263                    compiledpatterns.add( compiler.compile( pattern, GlobCompiler.DEFAULT_MASK | GlobCompiler.READ_ONLY_MASK ) );
264                } catch( final MalformedPatternException e ) {
265                    log.error( "Malformed pattern [" + pattern + "] in properties: ", e );
266                }
267            }
268
269            m_engine.setAttribute( INLINE_IMAGE_PATTERNS, compiledpatterns );
270        }
271
272        m_inlineImagePatterns = Collections.unmodifiableList( compiledpatterns );
273    }
274
275    public List< Pattern > getInlineImagePatterns() {
276        if( m_inlineImagePatterns == null ) {
277            initInlineImagePatterns();
278        }
279        return m_inlineImagePatterns;
280    }
281
282    /**
283     *  Parses the document.
284     *
285     *  @return the parsed document, as a WikiDocument
286     *  @throws IOException If something goes wrong.
287     */
288    public abstract WikiDocument parse() throws IOException;
289
290    /**
291     *  Return the current position in the reader stream. The value will be -1 prior to reading.
292     *
293     * @return the reader position as an int.
294     */
295    public int getPosition()
296    {
297        return m_pos;
298    }
299
300    /**
301     * Returns the next token in the stream.  This is the most called method in the entire parser, so it needs to be lean and mean.
302     *
303     * @return The next token in the stream; or, if the stream is ended, -1.
304     * @throws IOException If something bad happens
305     * @throws NullPointerException If you have not yet created an input document.
306     */
307    protected final int nextToken() throws IOException, NullPointerException {
308        // if( m_in == null ) return -1;
309        m_pos++;
310        return m_in.read();
311    }
312
313    /**
314     *  Push back any character to the current input.  Does not push back a read EOF, though.
315     *
316     *  @param c Character to push back.
317     *  @throws IOException In case the character cannot be pushed back.
318     */
319    protected void pushBack( final int c ) throws IOException {
320        if( c != -1 && m_in != null ) {
321            m_pos--;
322            m_in.unread( c );
323        }
324    }
325
326    /**
327     *  Writes HTML for error message.  Does not add it to the document, you have to do it yourself.
328     *
329     *  @param error The error string.
330     *  @return An Element containing the error.
331     */
332    public static Element makeError( final String error ) {
333        return new Element( "span" ).setAttribute( "class", "error" ).addContent( error );
334    }
335
336    /**
337     *  Cleans a Wiki name.  The functionality of this method was changed in 2.6 so that the list of allowed characters is much larger.
338     *  Use {@link #wikifyLink(String)} to get the legacy behaviour.
339     *  <P>
340     *  [ This is a link ] -&gt; This is a link
341     *
342     *  @param link Link to be cleared. Null is safe, and causes this to return null.
343     *  @return A cleaned link.
344     *
345     *  @since 2.0
346     */
347    public static String cleanLink( final String link ) {
348        return TextUtil.cleanString( link, TextUtil.PUNCTUATION_CHARS_ALLOWED );
349    }
350
351    /**
352     *  Cleans away extra legacy characters.  This method functions exactly like pre-2.6 cleanLink()
353     *  <P>
354     *  [ This is a link ] -&gt; ThisIsALink
355     *
356     *  @param link Link to be cleared. Null is safe, and causes this to return null.
357     *  @return A cleaned link.
358     *  @since 2.6
359     */
360    public static String wikifyLink( final String link ) {
361        return TextUtil.cleanString( link, TextUtil.LEGACY_CHARS_ALLOWED );
362    }
363
364}