001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.  
018*/
019package org.apache.wiki.parser;
020
021import java.io.BufferedReader;
022import java.io.IOException;
023import java.io.PushbackReader;
024import java.io.Reader;
025import java.util.ArrayList;
026
027import org.apache.wiki.StringTransmutator;
028import org.apache.wiki.WikiContext;
029import org.apache.wiki.WikiEngine;
030import org.jdom2.Element;
031
032/**
033 *   Provides an abstract class for the parser instances.
034 *
035 *   @since  2.4
036 */
037public abstract class MarkupParser
038{
039    /** Allow this many characters to be pushed back in the stream.  In effect,
040        this limits the size of a single line.  */
041    protected static final int              PUSHBACK_BUFFER_SIZE = 10*1024;
042    protected PushbackReader                m_in;
043    private int              m_pos = -1; // current position in reader stream
044
045    protected WikiEngine     m_engine;
046    protected WikiContext    m_context;
047
048    /** Optionally stores internal wikilinks */
049    protected ArrayList<StringTransmutator>      m_localLinkMutatorChain    = new ArrayList<StringTransmutator>();
050    protected ArrayList<StringTransmutator>      m_externalLinkMutatorChain = new ArrayList<StringTransmutator>();
051    protected ArrayList<StringTransmutator>      m_attachmentLinkMutatorChain = new ArrayList<StringTransmutator>();
052    protected ArrayList<HeadingListener>         m_headingListenerChain     = new ArrayList<HeadingListener>();
053    protected ArrayList<StringTransmutator>      m_linkMutators             = new ArrayList<StringTransmutator>();
054
055    protected boolean        m_inlineImages             = true;
056
057    protected boolean        m_parseAccessRules = true;
058    /** If set to "true", allows using raw HTML within Wiki text.  Be warned,
059        this is a VERY dangerous option to set - never turn this on in a publicly
060        allowable Wiki, unless you are absolutely certain of what you're doing. */
061    public static final String     PROP_ALLOWHTML        = "jspwiki.translatorReader.allowHTML";
062    /** If set to "true", enables plugins during parsing */
063    public static final String     PROP_RUNPLUGINS       = "jspwiki.translatorReader.runPlugins";
064
065    /** Lists all punctuation characters allowed in WikiMarkup. These
066        will not be cleaned away. This is for compatibility for older versions
067        of JSPWiki. */
068
069    protected static final String           LEGACY_CHARS_ALLOWED      = "._";
070
071    /** Lists all punctuation characters allowed in page names. */
072    public    static final String           PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$";
073
074    /**
075     *  Constructs a MarkupParser.  The subclass must call this constructor
076     *  to set up the necessary bits and pieces.
077     *  
078     *  @param context The WikiContext.
079     *  @param in The reader from which we are reading the bytes from.
080     */
081    protected MarkupParser( WikiContext context, Reader in )
082    {
083        m_engine = context.getEngine();
084        m_context = context;
085        setInputReader( in );
086    }
087
088    /**
089     *  Replaces the current input character stream with a new one.
090     *  @param in New source for input.  If null, this method does nothing.
091     *  @return the old stream
092     */
093    public Reader setInputReader( Reader in )
094    {
095        Reader old = m_in;
096
097        if( in != null )
098        {
099            m_in = new PushbackReader( new BufferedReader( in ),
100                                       PUSHBACK_BUFFER_SIZE );
101        }
102
103        return old;
104    }
105
106    /**
107     *  Adds a hook for processing link texts.  This hook is called
108     *  when the link text is written into the output stream, and
109     *  you may use it to modify the text.  It does not affect the
110     *  actual link, only the user-visible text.
111     *
112     *  @param mutator The hook to call.  Null is safe.
113     */
114    public void addLinkTransmutator( StringTransmutator mutator )
115    {
116        if( mutator != null )
117        {
118            m_linkMutators.add( mutator );
119        }
120    }
121
122    /**
123     *  Adds a hook for processing local links.  The engine
124     *  transforms both non-existing and existing page links.
125     *
126     *  @param mutator The hook to call.  Null is safe.
127     */
128    public void addLocalLinkHook( StringTransmutator mutator )
129    {
130        if( mutator != null )
131        {
132            m_localLinkMutatorChain.add( mutator );
133        }
134    }
135
136    /**
137     *  Adds a hook for processing external links.  This includes
138     *  all http:// ftp://, etc. links, including inlined images.
139     *
140     *  @param mutator The hook to call.  Null is safe.
141     */
142    public void addExternalLinkHook( StringTransmutator mutator )
143    {
144        if( mutator != null )
145        {
146            m_externalLinkMutatorChain.add( mutator );
147        }
148    }
149
150    /**
151     *  Adds a hook for processing attachment links.
152     *
153     *  @param mutator The hook to call.  Null is safe.
154     */
155    public void addAttachmentLinkHook( StringTransmutator mutator )
156    {
157        if( mutator != null )
158        {
159            m_attachmentLinkMutatorChain.add( mutator );
160        }
161    }
162
163    /**
164     *  Adds a HeadingListener to the parser chain.  It will be called whenever
165     *  a parsed header is found.
166     *  
167     *  @param listener The listener to add.
168     */
169    public void addHeadingListener( HeadingListener listener )
170    {
171        if( listener != null )
172        {
173            m_headingListenerChain.add( listener );
174        }
175    }
176
177    /**
178     *  Disables access rule parsing.
179     */
180    public void disableAccessRules()
181    {
182        m_parseAccessRules = false;
183    }
184
185    /**
186     *  Use this to turn on or off image inlining.
187     *  @param toggle If true, images are inlined (as per set in jspwiki.properties)
188     *                If false, then images won't be inlined; instead, they will be
189     *                treated as standard hyperlinks.
190     *  @since 2.2.9
191     */
192    public void enableImageInlining( boolean toggle )
193    {
194        m_inlineImages = toggle;
195    }
196
197    /**
198     *  Parses the document.
199     *  @return the parsed document, as a WikiDocument
200     *  @throws IOException If something goes wrong.
201     */
202    public abstract WikiDocument parse()
203         throws IOException;
204
205    /**
206     *  Return the current position in the reader stream.
207     *  The value will be -1 prior to reading.
208     * @return the reader position as an int.
209     */
210    public int getPosition()
211    {
212        return m_pos;
213    }
214
215    /**
216     * Returns the next token in the stream.  This is the most called method
217     * in the entire parser, so it needs to be lean and mean.
218     *
219     * @return The next token in the stream; or, if the stream is ended, -1.
220     * @throws IOException If something bad happens
221     * @throws NullPointerException If you have not yet created an input document.
222     */
223    protected final int nextToken()
224        throws IOException, NullPointerException
225    {
226        // if( m_in == null ) return -1;
227        m_pos++;
228        return m_in.read();
229    }
230
231    /**
232     *  Push back any character to the current input.  Does not
233     *  push back a read EOF, though.
234     *  
235     *  @param c Character to push back.
236     *  @throws IOException In case the character cannot be pushed back.
237     */
238    protected void pushBack( int c )
239        throws IOException
240    {
241        if( c != -1 && m_in != null )
242        {
243            m_pos--;
244            m_in.unread( c );
245        }
246    }
247    
248    /**
249     *  Writes HTML for error message.  Does not add it to the document, you
250     *  have to do it yourself.
251     *
252     *  @param error The error string.
253     *  @return An Element containing the error.
254     */
255
256    public static Element makeError( String error )
257    {
258        return new Element("span").setAttribute("class","error").addContent(error);
259    }
260
261    /**
262     *  Cleans a Wiki name.  The functionality of this method was changed in 2.6
263     *  so that the list of allowed characters is much larger.  Use wikifyLink()
264     *  to get the legacy behaviour.
265     *  <P>
266     *  [ This is a link ] -&gt; This is a link
267     *
268     *  @param link Link to be cleared. Null is safe, and causes this to return null.
269     *  @return A cleaned link.
270     *
271     *  @since 2.0
272     */
273    public static String cleanLink( String link )
274    {
275        return cleanLink(link, PUNCTUATION_CHARS_ALLOWED);
276    }
277
278    /**
279     *  Cleans a Wiki name based on a list of characters.  Also, any multiple
280     *  whitespace is collapsed into a single space, and any leading or trailing
281     *  space is removed.
282     *
283     *  @param link Link to be cleared. Null is safe, and causes this to return null.
284     *  @param allowedChars Characters which are allowed in the string.
285     *  @return A cleaned link.
286     *
287     *  @since 2.6
288     */
289    public static String cleanLink( String link, String allowedChars )
290    {
291        if( link == null ) return null;
292
293        link = link.trim();
294        StringBuilder clean = new StringBuilder(link.length());
295
296        //
297        //  Remove non-alphanumeric characters that should not
298        //  be put inside WikiNames.  Note that all valid
299        //  Unicode letters are considered okay for WikiNames.
300        //  It is the problem of the WikiPageProvider to take
301        //  care of actually storing that information.
302        //
303        //  Also capitalize things, if necessary.
304        //
305
306        boolean isWord = true;  // If true, we've just crossed a word boundary
307        boolean wasSpace = false;
308
309        for( int i = 0; i < link.length(); i++ )
310        {
311            char ch = link.charAt(i);
312
313            //
314            //  Cleans away repetitive whitespace and only uses the first one.
315            //
316            if( Character.isWhitespace(ch) )
317            {
318                if( wasSpace )
319                    continue;
320
321                wasSpace = true;
322            }
323            else
324            {
325                wasSpace = false;
326            }
327
328            //
329            //  Check if it is allowed to use this char, and capitalize, if necessary.
330            //
331            if( Character.isLetterOrDigit( ch ) || allowedChars.indexOf(ch) != -1 )
332            {
333                // Is a letter
334
335                if( isWord ) ch = Character.toUpperCase( ch );
336                clean.append( ch );
337                isWord = false;
338            }
339            else
340            {
341                isWord = true;
342            }
343        }
344
345        return clean.toString();
346    }
347
348    /**
349     *  Cleans away extra legacy characters.  This method functions exactly
350     *  like pre-2.6 cleanLink()
351     *  <P>
352     *  [ This is a link ] -&gt; ThisIsALink
353     *
354     *  @param link Link to be cleared. Null is safe, and causes this to return null.
355     *  @return A cleaned link.
356     *  @since 2.6
357     */
358    public static String wikifyLink(String link)
359    {
360        return MarkupParser.cleanLink(link, MarkupParser.LEGACY_CHARS_ALLOWED);
361    }
362
363}