001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018*/ 019package org.apache.wiki.parser; 020 021import java.io.BufferedReader; 022import java.io.IOException; 023import java.io.PushbackReader; 024import java.io.Reader; 025import java.util.ArrayList; 026 027import org.apache.wiki.StringTransmutator; 028import org.apache.wiki.WikiContext; 029import org.apache.wiki.WikiEngine; 030import org.jdom2.Element; 031 032/** 033 * Provides an abstract class for the parser instances. 034 * 035 * @since 2.4 036 */ 037public abstract class MarkupParser 038{ 039 /** Allow this many characters to be pushed back in the stream. In effect, 040 this limits the size of a single line. */ 041 protected static final int PUSHBACK_BUFFER_SIZE = 10*1024; 042 protected PushbackReader m_in; 043 private int m_pos = -1; // current position in reader stream 044 045 protected WikiEngine m_engine; 046 protected WikiContext m_context; 047 048 /** Optionally stores internal wikilinks */ 049 protected ArrayList<StringTransmutator> m_localLinkMutatorChain = new ArrayList<StringTransmutator>(); 050 protected ArrayList<StringTransmutator> m_externalLinkMutatorChain = new ArrayList<StringTransmutator>(); 051 protected ArrayList<StringTransmutator> m_attachmentLinkMutatorChain = new ArrayList<StringTransmutator>(); 052 protected ArrayList<HeadingListener> m_headingListenerChain = new ArrayList<HeadingListener>(); 053 protected ArrayList<StringTransmutator> m_linkMutators = new ArrayList<StringTransmutator>(); 054 055 protected boolean m_inlineImages = true; 056 057 protected boolean m_parseAccessRules = true; 058 /** If set to "true", allows using raw HTML within Wiki text. Be warned, 059 this is a VERY dangerous option to set - never turn this on in a publicly 060 allowable Wiki, unless you are absolutely certain of what you're doing. */ 061 public static final String PROP_ALLOWHTML = "jspwiki.translatorReader.allowHTML"; 062 /** If set to "true", enables plugins during parsing */ 063 public static final String PROP_RUNPLUGINS = "jspwiki.translatorReader.runPlugins"; 064 065 /** Lists all punctuation characters allowed in WikiMarkup. These 066 will not be cleaned away. This is for compatibility for older versions 067 of JSPWiki. */ 068 069 protected static final String LEGACY_CHARS_ALLOWED = "._"; 070 071 /** Lists all punctuation characters allowed in page names. */ 072 public static final String PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$"; 073 074 /** 075 * Constructs a MarkupParser. The subclass must call this constructor 076 * to set up the necessary bits and pieces. 077 * 078 * @param context The WikiContext. 079 * @param in The reader from which we are reading the bytes from. 080 */ 081 protected MarkupParser( WikiContext context, Reader in ) 082 { 083 m_engine = context.getEngine(); 084 m_context = context; 085 setInputReader( in ); 086 } 087 088 /** 089 * Replaces the current input character stream with a new one. 090 * @param in New source for input. If null, this method does nothing. 091 * @return the old stream 092 */ 093 public Reader setInputReader( Reader in ) 094 { 095 Reader old = m_in; 096 097 if( in != null ) 098 { 099 m_in = new PushbackReader( new BufferedReader( in ), 100 PUSHBACK_BUFFER_SIZE ); 101 } 102 103 return old; 104 } 105 106 /** 107 * Adds a hook for processing link texts. This hook is called 108 * when the link text is written into the output stream, and 109 * you may use it to modify the text. It does not affect the 110 * actual link, only the user-visible text. 111 * 112 * @param mutator The hook to call. Null is safe. 113 */ 114 public void addLinkTransmutator( StringTransmutator mutator ) 115 { 116 if( mutator != null ) 117 { 118 m_linkMutators.add( mutator ); 119 } 120 } 121 122 /** 123 * Adds a hook for processing local links. The engine 124 * transforms both non-existing and existing page links. 125 * 126 * @param mutator The hook to call. Null is safe. 127 */ 128 public void addLocalLinkHook( StringTransmutator mutator ) 129 { 130 if( mutator != null ) 131 { 132 m_localLinkMutatorChain.add( mutator ); 133 } 134 } 135 136 /** 137 * Adds a hook for processing external links. This includes 138 * all http:// ftp://, etc. links, including inlined images. 139 * 140 * @param mutator The hook to call. Null is safe. 141 */ 142 public void addExternalLinkHook( StringTransmutator mutator ) 143 { 144 if( mutator != null ) 145 { 146 m_externalLinkMutatorChain.add( mutator ); 147 } 148 } 149 150 /** 151 * Adds a hook for processing attachment links. 152 * 153 * @param mutator The hook to call. Null is safe. 154 */ 155 public void addAttachmentLinkHook( StringTransmutator mutator ) 156 { 157 if( mutator != null ) 158 { 159 m_attachmentLinkMutatorChain.add( mutator ); 160 } 161 } 162 163 /** 164 * Adds a HeadingListener to the parser chain. It will be called whenever 165 * a parsed header is found. 166 * 167 * @param listener The listener to add. 168 */ 169 public void addHeadingListener( HeadingListener listener ) 170 { 171 if( listener != null ) 172 { 173 m_headingListenerChain.add( listener ); 174 } 175 } 176 177 /** 178 * Disables access rule parsing. 179 */ 180 public void disableAccessRules() 181 { 182 m_parseAccessRules = false; 183 } 184 185 /** 186 * Use this to turn on or off image inlining. 187 * @param toggle If true, images are inlined (as per set in jspwiki.properties) 188 * If false, then images won't be inlined; instead, they will be 189 * treated as standard hyperlinks. 190 * @since 2.2.9 191 */ 192 public void enableImageInlining( boolean toggle ) 193 { 194 m_inlineImages = toggle; 195 } 196 197 /** 198 * Parses the document. 199 * @return the parsed document, as a WikiDocument 200 * @throws IOException If something goes wrong. 201 */ 202 public abstract WikiDocument parse() 203 throws IOException; 204 205 /** 206 * Return the current position in the reader stream. 207 * The value will be -1 prior to reading. 208 * @return the reader position as an int. 209 */ 210 public int getPosition() 211 { 212 return m_pos; 213 } 214 215 /** 216 * Returns the next token in the stream. This is the most called method 217 * in the entire parser, so it needs to be lean and mean. 218 * 219 * @return The next token in the stream; or, if the stream is ended, -1. 220 * @throws IOException If something bad happens 221 * @throws NullPointerException If you have not yet created an input document. 222 */ 223 protected final int nextToken() 224 throws IOException, NullPointerException 225 { 226 // if( m_in == null ) return -1; 227 m_pos++; 228 return m_in.read(); 229 } 230 231 /** 232 * Push back any character to the current input. Does not 233 * push back a read EOF, though. 234 * 235 * @param c Character to push back. 236 * @throws IOException In case the character cannot be pushed back. 237 */ 238 protected void pushBack( int c ) 239 throws IOException 240 { 241 if( c != -1 && m_in != null ) 242 { 243 m_pos--; 244 m_in.unread( c ); 245 } 246 } 247 248 /** 249 * Writes HTML for error message. Does not add it to the document, you 250 * have to do it yourself. 251 * 252 * @param error The error string. 253 * @return An Element containing the error. 254 */ 255 256 public static Element makeError( String error ) 257 { 258 return new Element("span").setAttribute("class","error").addContent(error); 259 } 260 261 /** 262 * Cleans a Wiki name. The functionality of this method was changed in 2.6 263 * so that the list of allowed characters is much larger. Use wikifyLink() 264 * to get the legacy behaviour. 265 * <P> 266 * [ This is a link ] -> This is a link 267 * 268 * @param link Link to be cleared. Null is safe, and causes this to return null. 269 * @return A cleaned link. 270 * 271 * @since 2.0 272 */ 273 public static String cleanLink( String link ) 274 { 275 return cleanLink(link, PUNCTUATION_CHARS_ALLOWED); 276 } 277 278 /** 279 * Cleans a Wiki name based on a list of characters. Also, any multiple 280 * whitespace is collapsed into a single space, and any leading or trailing 281 * space is removed. 282 * 283 * @param link Link to be cleared. Null is safe, and causes this to return null. 284 * @param allowedChars Characters which are allowed in the string. 285 * @return A cleaned link. 286 * 287 * @since 2.6 288 */ 289 public static String cleanLink( String link, String allowedChars ) 290 { 291 if( link == null ) return null; 292 293 link = link.trim(); 294 StringBuilder clean = new StringBuilder(link.length()); 295 296 // 297 // Remove non-alphanumeric characters that should not 298 // be put inside WikiNames. Note that all valid 299 // Unicode letters are considered okay for WikiNames. 300 // It is the problem of the WikiPageProvider to take 301 // care of actually storing that information. 302 // 303 // Also capitalize things, if necessary. 304 // 305 306 boolean isWord = true; // If true, we've just crossed a word boundary 307 boolean wasSpace = false; 308 309 for( int i = 0; i < link.length(); i++ ) 310 { 311 char ch = link.charAt(i); 312 313 // 314 // Cleans away repetitive whitespace and only uses the first one. 315 // 316 if( Character.isWhitespace(ch) ) 317 { 318 if( wasSpace ) 319 continue; 320 321 wasSpace = true; 322 } 323 else 324 { 325 wasSpace = false; 326 } 327 328 // 329 // Check if it is allowed to use this char, and capitalize, if necessary. 330 // 331 if( Character.isLetterOrDigit( ch ) || allowedChars.indexOf(ch) != -1 ) 332 { 333 // Is a letter 334 335 if( isWord ) ch = Character.toUpperCase( ch ); 336 clean.append( ch ); 337 isWord = false; 338 } 339 else 340 { 341 isWord = true; 342 } 343 } 344 345 return clean.toString(); 346 } 347 348 /** 349 * Cleans away extra legacy characters. This method functions exactly 350 * like pre-2.6 cleanLink() 351 * <P> 352 * [ This is a link ] -> ThisIsALink 353 * 354 * @param link Link to be cleared. Null is safe, and causes this to return null. 355 * @return A cleaned link. 356 * @since 2.6 357 */ 358 public static String wikifyLink(String link) 359 { 360 return MarkupParser.cleanLink(link, MarkupParser.LEGACY_CHARS_ALLOWED); 361 } 362 363}