001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018*/ 019package org.apache.wiki.parser; 020 021import org.apache.log4j.Logger; 022import org.apache.oro.text.GlobCompiler; 023import org.apache.oro.text.regex.MalformedPatternException; 024import org.apache.oro.text.regex.Pattern; 025import org.apache.oro.text.regex.PatternCompiler; 026import org.apache.wiki.StringTransmutator; 027import org.apache.wiki.api.core.Context; 028import org.apache.wiki.api.core.Engine; 029import org.apache.wiki.util.TextUtil; 030import org.jdom2.Element; 031 032import java.io.BufferedReader; 033import java.io.IOException; 034import java.io.PushbackReader; 035import java.io.Reader; 036import java.util.ArrayList; 037import java.util.Collection; 038import java.util.Collections; 039import java.util.List; 040 041/** 042 * Provides an abstract class for the parser instances. 043 * 044 * @since 2.4 045 */ 046public abstract class MarkupParser { 047 048 /** Allow this many characters to be pushed back in the stream. In effect, this limits the size of a single line. */ 049 protected static final int PUSHBACK_BUFFER_SIZE = 10*1024; 050 protected PushbackReader m_in; 051 private int m_pos = -1; // current position in reader stream 052 053 protected Engine m_engine; 054 protected Context m_context; 055 056 /** Optionally stores internal wikilinks */ 057 protected ArrayList< StringTransmutator > m_localLinkMutatorChain = new ArrayList<>(); 058 protected ArrayList< StringTransmutator > m_externalLinkMutatorChain = new ArrayList<>(); 059 protected ArrayList< StringTransmutator > m_attachmentLinkMutatorChain = new ArrayList<>(); 060 protected ArrayList< StringTransmutator > m_linkMutators = new ArrayList<>(); 061 protected ArrayList< HeadingListener > m_headingListenerChain = new ArrayList<>(); 062 063 protected boolean m_inlineImages = true; 064 protected boolean m_parseAccessRules = true; 065 /** Keeps image regexp Patterns */ 066 protected List< Pattern > m_inlineImagePatterns = null; 067 protected LinkParsingOperations m_linkParsingOperations; 068 069 private static final Logger log = Logger.getLogger( MarkupParser.class ); 070 071 /** If set to "true", allows using raw HTML within Wiki text. Be warned, this is a VERY dangerous option to set - 072 never turn this on in a publicly allowable Wiki, unless you are absolutely certain of what you're doing. */ 073 public static final String PROP_ALLOWHTML = "jspwiki.translatorReader.allowHTML"; 074 075 /** If set to "true", enables plugins during parsing */ 076 public static final String PROP_RUNPLUGINS = "jspwiki.translatorReader.runPlugins"; 077 078 /** If true, all outward links (external links) have a small link image appended. */ 079 public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage"; 080 081 /** If set to "true", all external links are tagged with 'rel="nofollow"' */ 082 public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow"; 083 084 /** If true, consider CamelCase hyperlinks as well. */ 085 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks"; 086 087 /** If true, all hyperlinks are translated as well, regardless whether they 088 are surrounded by brackets. */ 089 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris"; 090 091 /** If true, all outward attachment info links have a small link image appended. */ 092 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage"; 093 094 public static final String HASHLINK = "hashlink"; 095 096 /** Name of the outlink image; relative path to the JSPWiki directory. */ 097 public static final String OUTLINK_IMAGE = "images/out.png"; 098 /** Outlink css class. */ 099 public static final String OUTLINK = "outlink"; 100 101 private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns"; 102 103 /** The value for anchor element <tt>class</tt> attributes when used for wiki page (normal) links. The value is "wikipage". */ 104 public static final String CLASS_WIKIPAGE = "wikipage"; 105 106 /** The value for anchor element <tt>class</tt> attributes when used for edit page links. The value is "createpage". */ 107 public static final String CLASS_EDITPAGE = "createpage"; 108 109 /** The value for anchor element <tt>class</tt> attributes when used for interwiki page links. The value is "interwiki". */ 110 public static final String CLASS_INTERWIKI = "interwiki"; 111 112 /** The value for anchor element <tt>class</tt> attributes when used for footnote links. The value is "footnote". */ 113 public static final String CLASS_FOOTNOTE = "footnote"; 114 115 /** The value for anchor element <tt>class</tt> attributes when used for footnote links. The value is "footnote". */ 116 public static final String CLASS_FOOTNOTE_REF = "footnoteref"; 117 118 /** The value for anchor element <tt>class</tt> attributes when used for external links. The value is "external". */ 119 public static final String CLASS_EXTERNAL = "external"; 120 121 /** The value for anchor element <tt>class</tt> attributes when used for attachments. The value is "attachment". */ 122 public static final String CLASS_ATTACHMENT = "attachment"; 123 124 public static final String[] CLASS_TYPES = { 125 CLASS_WIKIPAGE, 126 CLASS_EDITPAGE, 127 "", 128 CLASS_FOOTNOTE, 129 CLASS_FOOTNOTE_REF, 130 "", 131 CLASS_EXTERNAL, 132 CLASS_INTERWIKI, 133 CLASS_EXTERNAL, 134 CLASS_WIKIPAGE, 135 CLASS_ATTACHMENT 136 }; 137 138 /** 139 * Constructs a MarkupParser. The subclass must call this constructor to set up the necessary bits and pieces. 140 * 141 * @param context The WikiContext. 142 * @param in The reader from which we are reading the bytes from. 143 */ 144 protected MarkupParser( final Context context, final Reader in ) { 145 m_engine = context.getEngine(); 146 m_context = context; 147 m_linkParsingOperations = new LinkParsingOperations( m_context ); 148 setInputReader( in ); 149 } 150 151 /** 152 * Replaces the current input character stream with a new one. 153 * 154 * @param in New source for input. If null, this method does nothing. 155 * @return the old stream 156 */ 157 public Reader setInputReader( final Reader in ) { 158 final Reader old = m_in; 159 if( in != null ) { 160 m_in = new PushbackReader( new BufferedReader( in ), PUSHBACK_BUFFER_SIZE ); 161 } 162 163 return old; 164 } 165 166 /** 167 * Adds a hook for processing link texts. This hook is called when the link text is written into the output stream, and 168 * you may use it to modify the text. It does not affect the actual link, only the user-visible text. 169 * 170 * @param mutator The hook to call. Null is safe. 171 */ 172 public void addLinkTransmutator( final StringTransmutator mutator ) { 173 addLinkHook( m_linkMutators, mutator ); 174 } 175 176 /** 177 * Adds a hook for processing local links. The engine transforms both non-existing and existing page links. 178 * 179 * @param mutator The hook to call. Null is safe. 180 */ 181 public void addLocalLinkHook( final StringTransmutator mutator ) { 182 addLinkHook( m_localLinkMutatorChain, mutator ); 183 } 184 185 /** 186 * Adds a hook for processing external links. This includes all http:// ftp://, etc. links, including inlined images. 187 * 188 * @param mutator The hook to call. Null is safe. 189 */ 190 public void addExternalLinkHook( final StringTransmutator mutator ) { 191 addLinkHook( m_externalLinkMutatorChain, mutator ); 192 } 193 194 /** 195 * Adds a hook for processing attachment links. 196 * 197 * @param mutator The hook to call. Null is safe. 198 */ 199 public void addAttachmentLinkHook( final StringTransmutator mutator ) { 200 addLinkHook( m_attachmentLinkMutatorChain, mutator ); 201 } 202 203 void addLinkHook( final List< StringTransmutator > mutatorChain, final StringTransmutator mutator ) { 204 if( mutator != null ) { 205 mutatorChain.add( mutator ); 206 } 207 } 208 209 /** 210 * Adds a HeadingListener to the parser chain. It will be called whenever a parsed header is found. 211 * 212 * @param listener The listener to add. 213 */ 214 public void addHeadingListener( final HeadingListener listener ) { 215 if( listener != null ) { 216 m_headingListenerChain.add( listener ); 217 } 218 } 219 220 /** 221 * Disables access rule parsing. 222 */ 223 public void disableAccessRules() 224 { 225 m_parseAccessRules = false; 226 } 227 228 public boolean isParseAccessRules() 229 { 230 return m_parseAccessRules; 231 } 232 233 /** 234 * Use this to turn on or off image inlining. 235 * 236 * @param toggle If true, images are inlined (as per set in jspwiki.properties) 237 * If false, then images won't be inlined; instead, they will be 238 * treated as standard hyperlinks. 239 * @since 2.2.9 240 */ 241 public void enableImageInlining( final boolean toggle ) 242 { 243 m_inlineImages = toggle; 244 } 245 246 public boolean isImageInlining() { 247 return m_inlineImages; 248 } 249 250 protected final void initInlineImagePatterns() { 251 final PatternCompiler compiler = new GlobCompiler(); 252 253 // We cache compiled patterns in the engine, since their creation is really expensive 254 List< Pattern > compiledpatterns = m_engine.getAttribute( INLINE_IMAGE_PATTERNS ); 255 256 if( compiledpatterns == null ) { 257 compiledpatterns = new ArrayList< >( 20 ); 258 final Collection< String > ptrns = m_engine.getAllInlinedImagePatterns(); 259 260 // Make them into Regexp Patterns. Unknown patterns are ignored. 261 for( final String pattern : ptrns ) { 262 try { 263 compiledpatterns.add( compiler.compile( pattern, GlobCompiler.DEFAULT_MASK | GlobCompiler.READ_ONLY_MASK ) ); 264 } catch( final MalformedPatternException e ) { 265 log.error( "Malformed pattern [" + pattern + "] in properties: ", e ); 266 } 267 } 268 269 m_engine.setAttribute( INLINE_IMAGE_PATTERNS, compiledpatterns ); 270 } 271 272 m_inlineImagePatterns = Collections.unmodifiableList( compiledpatterns ); 273 } 274 275 public List< Pattern > getInlineImagePatterns() { 276 if( m_inlineImagePatterns == null ) { 277 initInlineImagePatterns(); 278 } 279 return m_inlineImagePatterns; 280 } 281 282 /** 283 * Parses the document. 284 * 285 * @return the parsed document, as a WikiDocument 286 * @throws IOException If something goes wrong. 287 */ 288 public abstract WikiDocument parse() throws IOException; 289 290 /** 291 * Return the current position in the reader stream. The value will be -1 prior to reading. 292 * 293 * @return the reader position as an int. 294 */ 295 public int getPosition() 296 { 297 return m_pos; 298 } 299 300 /** 301 * Returns the next token in the stream. This is the most called method in the entire parser, so it needs to be lean and mean. 302 * 303 * @return The next token in the stream; or, if the stream is ended, -1. 304 * @throws IOException If something bad happens 305 * @throws NullPointerException If you have not yet created an input document. 306 */ 307 protected final int nextToken() throws IOException, NullPointerException { 308 // if( m_in == null ) return -1; 309 m_pos++; 310 return m_in.read(); 311 } 312 313 /** 314 * Push back any character to the current input. Does not push back a read EOF, though. 315 * 316 * @param c Character to push back. 317 * @throws IOException In case the character cannot be pushed back. 318 */ 319 protected void pushBack( final int c ) throws IOException { 320 if( c != -1 && m_in != null ) { 321 m_pos--; 322 m_in.unread( c ); 323 } 324 } 325 326 /** 327 * Writes HTML for error message. Does not add it to the document, you have to do it yourself. 328 * 329 * @param error The error string. 330 * @return An Element containing the error. 331 */ 332 public static Element makeError( final String error ) { 333 return new Element( "span" ).setAttribute( "class", "error" ).addContent( error ); 334 } 335 336 /** 337 * Cleans a Wiki name. The functionality of this method was changed in 2.6 so that the list of allowed characters is much larger. 338 * Use {@link #wikifyLink(String)} to get the legacy behaviour. 339 * <P> 340 * [ This is a link ] -> This is a link 341 * 342 * @param link Link to be cleared. Null is safe, and causes this to return null. 343 * @return A cleaned link. 344 * 345 * @since 2.0 346 */ 347 public static String cleanLink( final String link ) { 348 return TextUtil.cleanString( link, TextUtil.PUNCTUATION_CHARS_ALLOWED ); 349 } 350 351 /** 352 * Cleans away extra legacy characters. This method functions exactly like pre-2.6 cleanLink() 353 * <P> 354 * [ This is a link ] -> ThisIsALink 355 * 356 * @param link Link to be cleared. Null is safe, and causes this to return null. 357 * @return A cleaned link. 358 * @since 2.6 359 */ 360 public static String wikifyLink( final String link ) { 361 return TextUtil.cleanString( link, TextUtil.LEGACY_CHARS_ALLOWED ); 362 } 363 364}