001 /* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019 package org.apache.wiki.parser; 020 021 import java.io.BufferedReader; 022 import java.io.IOException; 023 import java.io.PushbackReader; 024 import java.io.Reader; 025 import java.util.ArrayList; 026 027 import org.apache.wiki.StringTransmutator; 028 import org.apache.wiki.WikiContext; 029 import org.apache.wiki.WikiEngine; 030 031 /** 032 * Provides an abstract class for the parser instances. 033 * 034 * @since 2.4 035 */ 036 public abstract class MarkupParser 037 { 038 /** Allow this many characters to be pushed back in the stream. In effect, 039 this limits the size of a single line. */ 040 protected static final int PUSHBACK_BUFFER_SIZE = 10*1024; 041 protected PushbackReader m_in; 042 private int m_pos = -1; // current position in reader stream 043 044 protected WikiEngine m_engine; 045 protected WikiContext m_context; 046 047 /** Optionally stores internal wikilinks */ 048 protected ArrayList<StringTransmutator> m_localLinkMutatorChain = new ArrayList<StringTransmutator>(); 049 protected ArrayList<StringTransmutator> m_externalLinkMutatorChain = new ArrayList<StringTransmutator>(); 050 protected ArrayList<StringTransmutator> m_attachmentLinkMutatorChain = new ArrayList<StringTransmutator>(); 051 protected ArrayList<HeadingListener> m_headingListenerChain = new ArrayList<HeadingListener>(); 052 protected ArrayList<StringTransmutator> m_linkMutators = new ArrayList<StringTransmutator>(); 053 054 protected boolean m_inlineImages = true; 055 056 protected boolean m_parseAccessRules = true; 057 /** If set to "true", allows using raw HTML within Wiki text. Be warned, 058 this is a VERY dangerous option to set - never turn this on in a publicly 059 allowable Wiki, unless you are absolutely certain of what you're doing. */ 060 public static final String PROP_ALLOWHTML = "jspwiki.translatorReader.allowHTML"; 061 /** If set to "true", enables plugins during parsing */ 062 public static final String PROP_RUNPLUGINS = "jspwiki.translatorReader.runPlugins"; 063 064 /** Lists all punctuation characters allowed in WikiMarkup. These 065 will not be cleaned away. This is for compatibility for older versions 066 of JSPWiki. */ 067 068 protected static final String LEGACY_CHARS_ALLOWED = "._"; 069 070 /** Lists all punctuation characters allowed in page names. */ 071 public static final String PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$"; 072 073 /** 074 * Constructs a MarkupParser. The subclass must call this constructor 075 * to set up the necessary bits and pieces. 076 * 077 * @param context The WikiContext. 078 * @param in The reader from which we are reading the bytes from. 079 */ 080 protected MarkupParser( WikiContext context, Reader in ) 081 { 082 m_engine = context.getEngine(); 083 m_context = context; 084 setInputReader( in ); 085 } 086 087 /** 088 * Replaces the current input character stream with a new one. 089 * @param in New source for input. If null, this method does nothing. 090 * @return the old stream 091 */ 092 public Reader setInputReader( Reader in ) 093 { 094 Reader old = m_in; 095 096 if( in != null ) 097 { 098 m_in = new PushbackReader( new BufferedReader( in ), 099 PUSHBACK_BUFFER_SIZE ); 100 } 101 102 return old; 103 } 104 105 /** 106 * Adds a hook for processing link texts. This hook is called 107 * when the link text is written into the output stream, and 108 * you may use it to modify the text. It does not affect the 109 * actual link, only the user-visible text. 110 * 111 * @param mutator The hook to call. Null is safe. 112 */ 113 public void addLinkTransmutator( StringTransmutator mutator ) 114 { 115 if( mutator != null ) 116 { 117 m_linkMutators.add( mutator ); 118 } 119 } 120 121 /** 122 * Adds a hook for processing local links. The engine 123 * transforms both non-existing and existing page links. 124 * 125 * @param mutator The hook to call. Null is safe. 126 */ 127 public void addLocalLinkHook( StringTransmutator mutator ) 128 { 129 if( mutator != null ) 130 { 131 m_localLinkMutatorChain.add( mutator ); 132 } 133 } 134 135 /** 136 * Adds a hook for processing external links. This includes 137 * all http:// ftp://, etc. links, including inlined images. 138 * 139 * @param mutator The hook to call. Null is safe. 140 */ 141 public void addExternalLinkHook( StringTransmutator mutator ) 142 { 143 if( mutator != null ) 144 { 145 m_externalLinkMutatorChain.add( mutator ); 146 } 147 } 148 149 /** 150 * Adds a hook for processing attachment links. 151 * 152 * @param mutator The hook to call. Null is safe. 153 */ 154 public void addAttachmentLinkHook( StringTransmutator mutator ) 155 { 156 if( mutator != null ) 157 { 158 m_attachmentLinkMutatorChain.add( mutator ); 159 } 160 } 161 162 /** 163 * Adds a HeadingListener to the parser chain. It will be called whenever 164 * a parsed header is found. 165 * 166 * @param listener The listener to add. 167 */ 168 public void addHeadingListener( HeadingListener listener ) 169 { 170 if( listener != null ) 171 { 172 m_headingListenerChain.add( listener ); 173 } 174 } 175 176 /** 177 * Disables access rule parsing. 178 */ 179 public void disableAccessRules() 180 { 181 m_parseAccessRules = false; 182 } 183 184 /** 185 * Use this to turn on or off image inlining. 186 * @param toggle If true, images are inlined (as per set in jspwiki.properties) 187 * If false, then images won't be inlined; instead, they will be 188 * treated as standard hyperlinks. 189 * @since 2.2.9 190 */ 191 public void enableImageInlining( boolean toggle ) 192 { 193 m_inlineImages = toggle; 194 } 195 196 /** 197 * Parses the document. 198 * @return the parsed document, as a WikiDocument 199 * @throws IOException If something goes wrong. 200 */ 201 public abstract WikiDocument parse() 202 throws IOException; 203 204 /** 205 * Return the current position in the reader stream. 206 * The value will be -1 prior to reading. 207 * @return the reader position as an int. 208 */ 209 public int getPosition() 210 { 211 return m_pos; 212 } 213 214 /** 215 * Returns the next token in the stream. This is the most called method 216 * in the entire parser, so it needs to be lean and mean. 217 * 218 * @return The next token in the stream; or, if the stream is ended, -1. 219 * @throws IOException If something bad happens 220 * @throws NullPointerException If you have not yet created an input document. 221 */ 222 protected final int nextToken() 223 throws IOException, NullPointerException 224 { 225 // if( m_in == null ) return -1; 226 m_pos++; 227 return m_in.read(); 228 } 229 230 /** 231 * Push back any character to the current input. Does not 232 * push back a read EOF, though. 233 * 234 * @param c Character to push back. 235 * @throws IOException In case the character cannot be pushed back. 236 */ 237 protected void pushBack( int c ) 238 throws IOException 239 { 240 if( c != -1 && m_in != null ) 241 { 242 m_pos--; 243 m_in.unread( c ); 244 } 245 } 246 247 /** 248 * Cleans a Wiki name. The functionality of this method was changed in 2.6 249 * so that the list of allowed characters is much larger. Use wikifyLink() 250 * to get the legacy behaviour. 251 * <P> 252 * [ This is a link ] -> This is a link 253 * 254 * @param link Link to be cleared. Null is safe, and causes this to return null. 255 * @return A cleaned link. 256 * 257 * @since 2.0 258 */ 259 public static String cleanLink( String link ) 260 { 261 return cleanLink(link, PUNCTUATION_CHARS_ALLOWED); 262 } 263 264 /** 265 * Cleans a Wiki name based on a list of characters. Also, any multiple 266 * whitespace is collapsed into a single space, and any leading or trailing 267 * space is removed. 268 * 269 * @param link Link to be cleared. Null is safe, and causes this to return null. 270 * @param allowedChars Characters which are allowed in the string. 271 * @return A cleaned link. 272 * 273 * @since 2.6 274 */ 275 public static String cleanLink( String link, String allowedChars ) 276 { 277 if( link == null ) return null; 278 279 link = link.trim(); 280 StringBuffer clean = new StringBuffer(link.length()); 281 282 // 283 // Remove non-alphanumeric characters that should not 284 // be put inside WikiNames. Note that all valid 285 // Unicode letters are considered okay for WikiNames. 286 // It is the problem of the WikiPageProvider to take 287 // care of actually storing that information. 288 // 289 // Also capitalize things, if necessary. 290 // 291 292 boolean isWord = true; // If true, we've just crossed a word boundary 293 boolean wasSpace = false; 294 295 for( int i = 0; i < link.length(); i++ ) 296 { 297 char ch = link.charAt(i); 298 299 // 300 // Cleans away repetitive whitespace and only uses the first one. 301 // 302 if( Character.isWhitespace(ch) ) 303 { 304 if( wasSpace ) 305 continue; 306 307 wasSpace = true; 308 } 309 else 310 { 311 wasSpace = false; 312 } 313 314 // 315 // Check if it is allowed to use this char, and capitalize, if necessary. 316 // 317 if( Character.isLetterOrDigit( ch ) || allowedChars.indexOf(ch) != -1 ) 318 { 319 // Is a letter 320 321 if( isWord ) ch = Character.toUpperCase( ch ); 322 clean.append( ch ); 323 isWord = false; 324 } 325 else 326 { 327 isWord = true; 328 } 329 } 330 331 return clean.toString(); 332 } 333 334 /** 335 * Cleans away extra legacy characters. This method functions exactly 336 * like pre-2.6 cleanLink() 337 * <P> 338 * [ This is a link ] -> ThisIsALink 339 * 340 * @param link Link to be cleared. Null is safe, and causes this to return null. 341 * @return A cleaned link. 342 * @since 2.6 343 */ 344 public static String wikifyLink(String link) 345 { 346 return MarkupParser.cleanLink(link, MarkupParser.LEGACY_CHARS_ALLOWED); 347 } 348 349 }