001    /*
002        Licensed to the Apache Software Foundation (ASF) under one
003        or more contributor license agreements.  See the NOTICE file
004        distributed with this work for additional information
005        regarding copyright ownership.  The ASF licenses this file
006        to you under the Apache License, Version 2.0 (the
007        "License"); you may not use this file except in compliance
008        with the License.  You may obtain a copy of the License at
009    
010           http://www.apache.org/licenses/LICENSE-2.0
011    
012        Unless required by applicable law or agreed to in writing,
013        software distributed under the License is distributed on an
014        "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015        KIND, either express or implied.  See the License for the
016        specific language governing permissions and limitations
017        under the License.  
018    */
019    package org.apache.wiki.parser;
020    
021    import java.io.BufferedReader;
022    import java.io.IOException;
023    import java.io.PushbackReader;
024    import java.io.Reader;
025    import java.util.ArrayList;
026    
027    import org.apache.wiki.StringTransmutator;
028    import org.apache.wiki.WikiContext;
029    import org.apache.wiki.WikiEngine;
030    
031    /**
032     *   Provides an abstract class for the parser instances.
033     *
034     *   @since  2.4
035     */
036    public abstract class MarkupParser
037    {
038        /** Allow this many characters to be pushed back in the stream.  In effect,
039            this limits the size of a single line.  */
040        protected static final int              PUSHBACK_BUFFER_SIZE = 10*1024;
041        protected PushbackReader                m_in;
042        private int              m_pos = -1; // current position in reader stream
043    
044        protected WikiEngine     m_engine;
045        protected WikiContext    m_context;
046    
047        /** Optionally stores internal wikilinks */
048        protected ArrayList<StringTransmutator>      m_localLinkMutatorChain    = new ArrayList<StringTransmutator>();
049        protected ArrayList<StringTransmutator>      m_externalLinkMutatorChain = new ArrayList<StringTransmutator>();
050        protected ArrayList<StringTransmutator>      m_attachmentLinkMutatorChain = new ArrayList<StringTransmutator>();
051        protected ArrayList<HeadingListener>         m_headingListenerChain     = new ArrayList<HeadingListener>();
052        protected ArrayList<StringTransmutator>      m_linkMutators             = new ArrayList<StringTransmutator>();
053    
054        protected boolean        m_inlineImages             = true;
055    
056        protected boolean        m_parseAccessRules = true;
057        /** If set to "true", allows using raw HTML within Wiki text.  Be warned,
058            this is a VERY dangerous option to set - never turn this on in a publicly
059            allowable Wiki, unless you are absolutely certain of what you're doing. */
060        public static final String     PROP_ALLOWHTML        = "jspwiki.translatorReader.allowHTML";
061        /** If set to "true", enables plugins during parsing */
062        public static final String     PROP_RUNPLUGINS       = "jspwiki.translatorReader.runPlugins";
063    
064        /** Lists all punctuation characters allowed in WikiMarkup. These
065            will not be cleaned away. This is for compatibility for older versions
066            of JSPWiki. */
067    
068        protected static final String           LEGACY_CHARS_ALLOWED      = "._";
069    
070        /** Lists all punctuation characters allowed in page names. */
071        public    static final String           PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$";
072    
073        /**
074         *  Constructs a MarkupParser.  The subclass must call this constructor
075         *  to set up the necessary bits and pieces.
076         *  
077         *  @param context The WikiContext.
078         *  @param in The reader from which we are reading the bytes from.
079         */
080        protected MarkupParser( WikiContext context, Reader in )
081        {
082            m_engine = context.getEngine();
083            m_context = context;
084            setInputReader( in );
085        }
086    
087        /**
088         *  Replaces the current input character stream with a new one.
089         *  @param in New source for input.  If null, this method does nothing.
090         *  @return the old stream
091         */
092        public Reader setInputReader( Reader in )
093        {
094            Reader old = m_in;
095    
096            if( in != null )
097            {
098                m_in = new PushbackReader( new BufferedReader( in ),
099                                           PUSHBACK_BUFFER_SIZE );
100            }
101    
102            return old;
103        }
104    
105        /**
106         *  Adds a hook for processing link texts.  This hook is called
107         *  when the link text is written into the output stream, and
108         *  you may use it to modify the text.  It does not affect the
109         *  actual link, only the user-visible text.
110         *
111         *  @param mutator The hook to call.  Null is safe.
112         */
113        public void addLinkTransmutator( StringTransmutator mutator )
114        {
115            if( mutator != null )
116            {
117                m_linkMutators.add( mutator );
118            }
119        }
120    
121        /**
122         *  Adds a hook for processing local links.  The engine
123         *  transforms both non-existing and existing page links.
124         *
125         *  @param mutator The hook to call.  Null is safe.
126         */
127        public void addLocalLinkHook( StringTransmutator mutator )
128        {
129            if( mutator != null )
130            {
131                m_localLinkMutatorChain.add( mutator );
132            }
133        }
134    
135        /**
136         *  Adds a hook for processing external links.  This includes
137         *  all http:// ftp://, etc. links, including inlined images.
138         *
139         *  @param mutator The hook to call.  Null is safe.
140         */
141        public void addExternalLinkHook( StringTransmutator mutator )
142        {
143            if( mutator != null )
144            {
145                m_externalLinkMutatorChain.add( mutator );
146            }
147        }
148    
149        /**
150         *  Adds a hook for processing attachment links.
151         *
152         *  @param mutator The hook to call.  Null is safe.
153         */
154        public void addAttachmentLinkHook( StringTransmutator mutator )
155        {
156            if( mutator != null )
157            {
158                m_attachmentLinkMutatorChain.add( mutator );
159            }
160        }
161    
162        /**
163         *  Adds a HeadingListener to the parser chain.  It will be called whenever
164         *  a parsed header is found.
165         *  
166         *  @param listener The listener to add.
167         */
168        public void addHeadingListener( HeadingListener listener )
169        {
170            if( listener != null )
171            {
172                m_headingListenerChain.add( listener );
173            }
174        }
175    
176        /**
177         *  Disables access rule parsing.
178         */
179        public void disableAccessRules()
180        {
181            m_parseAccessRules = false;
182        }
183    
184        /**
185         *  Use this to turn on or off image inlining.
186         *  @param toggle If true, images are inlined (as per set in jspwiki.properties)
187         *                If false, then images won't be inlined; instead, they will be
188         *                treated as standard hyperlinks.
189         *  @since 2.2.9
190         */
191        public void enableImageInlining( boolean toggle )
192        {
193            m_inlineImages = toggle;
194        }
195    
196        /**
197         *  Parses the document.
198         *  @return the parsed document, as a WikiDocument
199         *  @throws IOException If something goes wrong.
200         */
201        public abstract WikiDocument parse()
202             throws IOException;
203    
204        /**
205         *  Return the current position in the reader stream.
206         *  The value will be -1 prior to reading.
207         * @return the reader position as an int.
208         */
209        public int getPosition()
210        {
211            return m_pos;
212        }
213    
214        /**
215         * Returns the next token in the stream.  This is the most called method
216         * in the entire parser, so it needs to be lean and mean.
217         *
218         * @return The next token in the stream; or, if the stream is ended, -1.
219         * @throws IOException If something bad happens
220         * @throws NullPointerException If you have not yet created an input document.
221         */
222        protected final int nextToken()
223            throws IOException, NullPointerException
224        {
225            // if( m_in == null ) return -1;
226            m_pos++;
227            return m_in.read();
228        }
229    
230        /**
231         *  Push back any character to the current input.  Does not
232         *  push back a read EOF, though.
233         *  
234         *  @param c Character to push back.
235         *  @throws IOException In case the character cannot be pushed back.
236         */
237        protected void pushBack( int c )
238            throws IOException
239        {
240            if( c != -1 && m_in != null )
241            {
242                m_pos--;
243                m_in.unread( c );
244            }
245        }
246    
247        /**
248         *  Cleans a Wiki name.  The functionality of this method was changed in 2.6
249         *  so that the list of allowed characters is much larger.  Use wikifyLink()
250         *  to get the legacy behaviour.
251         *  <P>
252         *  [ This is a link ] -&gt; This is a link
253         *
254         *  @param link Link to be cleared. Null is safe, and causes this to return null.
255         *  @return A cleaned link.
256         *
257         *  @since 2.0
258         */
259        public static String cleanLink( String link )
260        {
261            return cleanLink(link, PUNCTUATION_CHARS_ALLOWED);
262        }
263    
264        /**
265         *  Cleans a Wiki name based on a list of characters.  Also, any multiple
266         *  whitespace is collapsed into a single space, and any leading or trailing
267         *  space is removed.
268         *
269         *  @param link Link to be cleared. Null is safe, and causes this to return null.
270         *  @param allowedChars Characters which are allowed in the string.
271         *  @return A cleaned link.
272         *
273         *  @since 2.6
274         */
275        public static String cleanLink( String link, String allowedChars )
276        {
277            if( link == null ) return null;
278    
279            link = link.trim();
280            StringBuffer clean = new StringBuffer(link.length());
281    
282            //
283            //  Remove non-alphanumeric characters that should not
284            //  be put inside WikiNames.  Note that all valid
285            //  Unicode letters are considered okay for WikiNames.
286            //  It is the problem of the WikiPageProvider to take
287            //  care of actually storing that information.
288            //
289            //  Also capitalize things, if necessary.
290            //
291    
292            boolean isWord = true;  // If true, we've just crossed a word boundary
293            boolean wasSpace = false;
294    
295            for( int i = 0; i < link.length(); i++ )
296            {
297                char ch = link.charAt(i);
298    
299                //
300                //  Cleans away repetitive whitespace and only uses the first one.
301                //
302                if( Character.isWhitespace(ch) )
303                {
304                    if( wasSpace )
305                        continue;
306    
307                    wasSpace = true;
308                }
309                else
310                {
311                    wasSpace = false;
312                }
313    
314                //
315                //  Check if it is allowed to use this char, and capitalize, if necessary.
316                //
317                if( Character.isLetterOrDigit( ch ) || allowedChars.indexOf(ch) != -1 )
318                {
319                    // Is a letter
320    
321                    if( isWord ) ch = Character.toUpperCase( ch );
322                    clean.append( ch );
323                    isWord = false;
324                }
325                else
326                {
327                    isWord = true;
328                }
329            }
330    
331            return clean.toString();
332        }
333    
334        /**
335         *  Cleans away extra legacy characters.  This method functions exactly
336         *  like pre-2.6 cleanLink()
337         *  <P>
338         *  [ This is a link ] -&gt; ThisIsALink
339         *
340         *  @param link Link to be cleared. Null is safe, and causes this to return null.
341         *  @return A cleaned link.
342         *  @since 2.6
343         */
344        public static String wikifyLink(String link)
345        {
346            return MarkupParser.cleanLink(link, MarkupParser.LEGACY_CHARS_ALLOWED);
347        }
348    
349    }