001 /*
002 Licensed to the Apache Software Foundation (ASF) under one
003 or more contributor license agreements. See the NOTICE file
004 distributed with this work for additional information
005 regarding copyright ownership. The ASF licenses this file
006 to you under the Apache License, Version 2.0 (the
007 "License"); you may not use this file except in compliance
008 with the License. You may obtain a copy of the License at
009
010 http://www.apache.org/licenses/LICENSE-2.0
011
012 Unless required by applicable law or agreed to in writing,
013 software distributed under the License is distributed on an
014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 KIND, either express or implied. See the License for the
016 specific language governing permissions and limitations
017 under the License.
018 */
019 package org.apache.wiki.parser;
020
021 import java.io.BufferedReader;
022 import java.io.IOException;
023 import java.io.PushbackReader;
024 import java.io.Reader;
025 import java.util.ArrayList;
026
027 import org.apache.wiki.StringTransmutator;
028 import org.apache.wiki.WikiContext;
029 import org.apache.wiki.WikiEngine;
030
031 /**
032 * Provides an abstract class for the parser instances.
033 *
034 * @since 2.4
035 */
036 public abstract class MarkupParser
037 {
038 /** Allow this many characters to be pushed back in the stream. In effect,
039 this limits the size of a single line. */
040 protected static final int PUSHBACK_BUFFER_SIZE = 10*1024;
041 protected PushbackReader m_in;
042 private int m_pos = -1; // current position in reader stream
043
044 protected WikiEngine m_engine;
045 protected WikiContext m_context;
046
047 /** Optionally stores internal wikilinks */
048 protected ArrayList<StringTransmutator> m_localLinkMutatorChain = new ArrayList<StringTransmutator>();
049 protected ArrayList<StringTransmutator> m_externalLinkMutatorChain = new ArrayList<StringTransmutator>();
050 protected ArrayList<StringTransmutator> m_attachmentLinkMutatorChain = new ArrayList<StringTransmutator>();
051 protected ArrayList<HeadingListener> m_headingListenerChain = new ArrayList<HeadingListener>();
052 protected ArrayList<StringTransmutator> m_linkMutators = new ArrayList<StringTransmutator>();
053
054 protected boolean m_inlineImages = true;
055
056 protected boolean m_parseAccessRules = true;
057 /** If set to "true", allows using raw HTML within Wiki text. Be warned,
058 this is a VERY dangerous option to set - never turn this on in a publicly
059 allowable Wiki, unless you are absolutely certain of what you're doing. */
060 public static final String PROP_ALLOWHTML = "jspwiki.translatorReader.allowHTML";
061 /** If set to "true", enables plugins during parsing */
062 public static final String PROP_RUNPLUGINS = "jspwiki.translatorReader.runPlugins";
063
064 /** Lists all punctuation characters allowed in WikiMarkup. These
065 will not be cleaned away. This is for compatibility for older versions
066 of JSPWiki. */
067
068 protected static final String LEGACY_CHARS_ALLOWED = "._";
069
070 /** Lists all punctuation characters allowed in page names. */
071 public static final String PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$";
072
073 /**
074 * Constructs a MarkupParser. The subclass must call this constructor
075 * to set up the necessary bits and pieces.
076 *
077 * @param context The WikiContext.
078 * @param in The reader from which we are reading the bytes from.
079 */
080 protected MarkupParser( WikiContext context, Reader in )
081 {
082 m_engine = context.getEngine();
083 m_context = context;
084 setInputReader( in );
085 }
086
087 /**
088 * Replaces the current input character stream with a new one.
089 * @param in New source for input. If null, this method does nothing.
090 * @return the old stream
091 */
092 public Reader setInputReader( Reader in )
093 {
094 Reader old = m_in;
095
096 if( in != null )
097 {
098 m_in = new PushbackReader( new BufferedReader( in ),
099 PUSHBACK_BUFFER_SIZE );
100 }
101
102 return old;
103 }
104
105 /**
106 * Adds a hook for processing link texts. This hook is called
107 * when the link text is written into the output stream, and
108 * you may use it to modify the text. It does not affect the
109 * actual link, only the user-visible text.
110 *
111 * @param mutator The hook to call. Null is safe.
112 */
113 public void addLinkTransmutator( StringTransmutator mutator )
114 {
115 if( mutator != null )
116 {
117 m_linkMutators.add( mutator );
118 }
119 }
120
121 /**
122 * Adds a hook for processing local links. The engine
123 * transforms both non-existing and existing page links.
124 *
125 * @param mutator The hook to call. Null is safe.
126 */
127 public void addLocalLinkHook( StringTransmutator mutator )
128 {
129 if( mutator != null )
130 {
131 m_localLinkMutatorChain.add( mutator );
132 }
133 }
134
135 /**
136 * Adds a hook for processing external links. This includes
137 * all http:// ftp://, etc. links, including inlined images.
138 *
139 * @param mutator The hook to call. Null is safe.
140 */
141 public void addExternalLinkHook( StringTransmutator mutator )
142 {
143 if( mutator != null )
144 {
145 m_externalLinkMutatorChain.add( mutator );
146 }
147 }
148
149 /**
150 * Adds a hook for processing attachment links.
151 *
152 * @param mutator The hook to call. Null is safe.
153 */
154 public void addAttachmentLinkHook( StringTransmutator mutator )
155 {
156 if( mutator != null )
157 {
158 m_attachmentLinkMutatorChain.add( mutator );
159 }
160 }
161
162 /**
163 * Adds a HeadingListener to the parser chain. It will be called whenever
164 * a parsed header is found.
165 *
166 * @param listener The listener to add.
167 */
168 public void addHeadingListener( HeadingListener listener )
169 {
170 if( listener != null )
171 {
172 m_headingListenerChain.add( listener );
173 }
174 }
175
176 /**
177 * Disables access rule parsing.
178 */
179 public void disableAccessRules()
180 {
181 m_parseAccessRules = false;
182 }
183
184 /**
185 * Use this to turn on or off image inlining.
186 * @param toggle If true, images are inlined (as per set in jspwiki.properties)
187 * If false, then images won't be inlined; instead, they will be
188 * treated as standard hyperlinks.
189 * @since 2.2.9
190 */
191 public void enableImageInlining( boolean toggle )
192 {
193 m_inlineImages = toggle;
194 }
195
196 /**
197 * Parses the document.
198 * @return the parsed document, as a WikiDocument
199 * @throws IOException If something goes wrong.
200 */
201 public abstract WikiDocument parse()
202 throws IOException;
203
204 /**
205 * Return the current position in the reader stream.
206 * The value will be -1 prior to reading.
207 * @return the reader position as an int.
208 */
209 public int getPosition()
210 {
211 return m_pos;
212 }
213
214 /**
215 * Returns the next token in the stream. This is the most called method
216 * in the entire parser, so it needs to be lean and mean.
217 *
218 * @return The next token in the stream; or, if the stream is ended, -1.
219 * @throws IOException If something bad happens
220 * @throws NullPointerException If you have not yet created an input document.
221 */
222 protected final int nextToken()
223 throws IOException, NullPointerException
224 {
225 // if( m_in == null ) return -1;
226 m_pos++;
227 return m_in.read();
228 }
229
230 /**
231 * Push back any character to the current input. Does not
232 * push back a read EOF, though.
233 *
234 * @param c Character to push back.
235 * @throws IOException In case the character cannot be pushed back.
236 */
237 protected void pushBack( int c )
238 throws IOException
239 {
240 if( c != -1 && m_in != null )
241 {
242 m_pos--;
243 m_in.unread( c );
244 }
245 }
246
247 /**
248 * Cleans a Wiki name. The functionality of this method was changed in 2.6
249 * so that the list of allowed characters is much larger. Use wikifyLink()
250 * to get the legacy behaviour.
251 * <P>
252 * [ This is a link ] -> This is a link
253 *
254 * @param link Link to be cleared. Null is safe, and causes this to return null.
255 * @return A cleaned link.
256 *
257 * @since 2.0
258 */
259 public static String cleanLink( String link )
260 {
261 return cleanLink(link, PUNCTUATION_CHARS_ALLOWED);
262 }
263
264 /**
265 * Cleans a Wiki name based on a list of characters. Also, any multiple
266 * whitespace is collapsed into a single space, and any leading or trailing
267 * space is removed.
268 *
269 * @param link Link to be cleared. Null is safe, and causes this to return null.
270 * @param allowedChars Characters which are allowed in the string.
271 * @return A cleaned link.
272 *
273 * @since 2.6
274 */
275 public static String cleanLink( String link, String allowedChars )
276 {
277 if( link == null ) return null;
278
279 link = link.trim();
280 StringBuffer clean = new StringBuffer(link.length());
281
282 //
283 // Remove non-alphanumeric characters that should not
284 // be put inside WikiNames. Note that all valid
285 // Unicode letters are considered okay for WikiNames.
286 // It is the problem of the WikiPageProvider to take
287 // care of actually storing that information.
288 //
289 // Also capitalize things, if necessary.
290 //
291
292 boolean isWord = true; // If true, we've just crossed a word boundary
293 boolean wasSpace = false;
294
295 for( int i = 0; i < link.length(); i++ )
296 {
297 char ch = link.charAt(i);
298
299 //
300 // Cleans away repetitive whitespace and only uses the first one.
301 //
302 if( Character.isWhitespace(ch) )
303 {
304 if( wasSpace )
305 continue;
306
307 wasSpace = true;
308 }
309 else
310 {
311 wasSpace = false;
312 }
313
314 //
315 // Check if it is allowed to use this char, and capitalize, if necessary.
316 //
317 if( Character.isLetterOrDigit( ch ) || allowedChars.indexOf(ch) != -1 )
318 {
319 // Is a letter
320
321 if( isWord ) ch = Character.toUpperCase( ch );
322 clean.append( ch );
323 isWord = false;
324 }
325 else
326 {
327 isWord = true;
328 }
329 }
330
331 return clean.toString();
332 }
333
334 /**
335 * Cleans away extra legacy characters. This method functions exactly
336 * like pre-2.6 cleanLink()
337 * <P>
338 * [ This is a link ] -> ThisIsALink
339 *
340 * @param link Link to be cleared. Null is safe, and causes this to return null.
341 * @return A cleaned link.
342 * @since 2.6
343 */
344 public static String wikifyLink(String link)
345 {
346 return MarkupParser.cleanLink(link, MarkupParser.LEGACY_CHARS_ALLOWED);
347 }
348
349 }