001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018*/ 019package org.apache.wiki.parser; 020 021import org.apache.logging.log4j.LogManager; 022import org.apache.logging.log4j.Logger; 023import org.apache.oro.text.GlobCompiler; 024import org.apache.oro.text.regex.MalformedPatternException; 025import org.apache.oro.text.regex.Pattern; 026import org.apache.oro.text.regex.PatternCompiler; 027import org.apache.wiki.StringTransmutator; 028import org.apache.wiki.api.core.Context; 029import org.apache.wiki.api.core.Engine; 030import org.apache.wiki.util.TextUtil; 031import org.jdom2.Element; 032 033import java.io.BufferedReader; 034import java.io.IOException; 035import java.io.PushbackReader; 036import java.io.Reader; 037import java.util.ArrayList; 038import java.util.Collection; 039import java.util.Collections; 040import java.util.List; 041 042/** 043 * Provides an abstract class for the parser instances. 044 * 045 * @since 2.4 046 */ 047public abstract class MarkupParser { 048 049 /** Allow this many characters to be pushed back in the stream. In effect, this limits the size of a single line. */ 050 protected static final int PUSHBACK_BUFFER_SIZE = 10*1024; 051 protected PushbackReader m_in; 052 private int m_pos = -1; // current position in reader stream 053 054 protected final Engine m_engine; 055 protected final Context m_context; 056 057 /** Optionally stores internal wikilinks */ 058 protected final ArrayList< StringTransmutator > m_localLinkMutatorChain = new ArrayList<>(); 059 protected final ArrayList< StringTransmutator > m_externalLinkMutatorChain = new ArrayList<>(); 060 protected final ArrayList< StringTransmutator > m_attachmentLinkMutatorChain = new ArrayList<>(); 061 protected final ArrayList< StringTransmutator > m_linkMutators = new ArrayList<>(); 062 protected final ArrayList< HeadingListener > m_headingListenerChain = new ArrayList<>(); 063 064 protected boolean m_inlineImages = true; 065 protected boolean m_parseAccessRules = true; 066 /** Keeps image regexp Patterns */ 067 protected List< Pattern > m_inlineImagePatterns; 068 protected final LinkParsingOperations m_linkParsingOperations; 069 070 private static final Logger LOG = LogManager.getLogger( MarkupParser.class ); 071 072 /** If set to "true", allows using raw HTML within Wiki text. Be warned, this is a VERY dangerous option to set - 073 never turn this on in a publicly allowable Wiki, unless you are absolutely certain of what you're doing. */ 074 public static final String PROP_ALLOWHTML = "jspwiki.translatorReader.allowHTML"; 075 076 /** If set to "true", enables plugins during parsing */ 077 public static final String PROP_RUNPLUGINS = "jspwiki.translatorReader.runPlugins"; 078 079 /** If true, all outward links (external links) have a small link image appended. */ 080 public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage"; 081 082 /** If set to "true", all external links are tagged with 'rel="nofollow"' */ 083 public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow"; 084 085 /** If true, consider CamelCase hyperlinks as well. */ 086 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks"; 087 088 /** If true, all hyperlinks are translated as well, regardless whether they 089 are surrounded by brackets. */ 090 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris"; 091 092 /** If true, all outward attachment info links have a small link image appended. */ 093 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage"; 094 095 public static final String HASHLINK = "hashlink"; 096 097 /** Name of the outlink image; relative path to the JSPWiki directory. */ 098 public static final String OUTLINK_IMAGE = "images/out.png"; 099 /** Outlink css class. */ 100 public static final String OUTLINK = "outlink"; 101 102 private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns"; 103 104 /** The value for anchor element <tt>class</tt> attributes when used for wiki page (normal) links. The value is "wikipage". */ 105 public static final String CLASS_WIKIPAGE = "wikipage"; 106 107 /** The value for anchor element <tt>class</tt> attributes when used for edit page links. The value is "createpage". */ 108 public static final String CLASS_EDITPAGE = "createpage"; 109 110 /** The value for anchor element <tt>class</tt> attributes when used for interwiki page links. The value is "interwiki". */ 111 public static final String CLASS_INTERWIKI = "interwiki"; 112 113 /** The value for anchor element <tt>class</tt> attributes when used for footnote links. The value is "footnote". */ 114 public static final String CLASS_FOOTNOTE = "footnote"; 115 116 /** The value for anchor element <tt>class</tt> attributes when used for footnote links. The value is "footnote". */ 117 public static final String CLASS_FOOTNOTE_REF = "footnoteref"; 118 119 /** The value for anchor element <tt>class</tt> attributes when used for external links. The value is "external". */ 120 public static final String CLASS_EXTERNAL = "external"; 121 122 /** The value for anchor element <tt>class</tt> attributes when used for attachments. The value is "attachment". */ 123 public static final String CLASS_ATTACHMENT = "attachment"; 124 125 public static final String[] CLASS_TYPES = { 126 CLASS_WIKIPAGE, 127 CLASS_EDITPAGE, 128 "", 129 CLASS_FOOTNOTE, 130 CLASS_FOOTNOTE_REF, 131 "", 132 CLASS_EXTERNAL, 133 CLASS_INTERWIKI, 134 CLASS_EXTERNAL, 135 CLASS_WIKIPAGE, 136 CLASS_ATTACHMENT 137 }; 138 139 /** 140 * Constructs a MarkupParser. The subclass must call this constructor to set up the necessary bits and pieces. 141 * 142 * @param context The WikiContext. 143 * @param in The reader from which we are reading the bytes from. 144 */ 145 protected MarkupParser( final Context context, final Reader in ) { 146 m_engine = context.getEngine(); 147 m_context = context; 148 m_linkParsingOperations = new LinkParsingOperations( m_context ); 149 setInputReader( in ); 150 } 151 152 /** 153 * Replaces the current input character stream with a new one. 154 * 155 * @param in New source for input. If null, this method does nothing. 156 * @return the old stream 157 */ 158 public Reader setInputReader( final Reader in ) { 159 final Reader old = m_in; 160 if( in != null ) { 161 m_in = new PushbackReader( new BufferedReader( in ), PUSHBACK_BUFFER_SIZE ); 162 } 163 164 return old; 165 } 166 167 /** 168 * Adds a hook for processing link texts. This hook is called when the link text is written into the output stream, and 169 * you may use it to modify the text. It does not affect the actual link, only the user-visible text. 170 * 171 * @param mutator The hook to call. Null is safe. 172 */ 173 public void addLinkTransmutator( final StringTransmutator mutator ) { 174 addLinkHook( m_linkMutators, mutator ); 175 } 176 177 /** 178 * Adds a hook for processing local links. The engine transforms both non-existing and existing page links. 179 * 180 * @param mutator The hook to call. Null is safe. 181 */ 182 public void addLocalLinkHook( final StringTransmutator mutator ) { 183 addLinkHook( m_localLinkMutatorChain, mutator ); 184 } 185 186 /** 187 * Adds a hook for processing external links. This includes all http:// ftp://, etc. links, including inlined images. 188 * 189 * @param mutator The hook to call. Null is safe. 190 */ 191 public void addExternalLinkHook( final StringTransmutator mutator ) { 192 addLinkHook( m_externalLinkMutatorChain, mutator ); 193 } 194 195 /** 196 * Adds a hook for processing attachment links. 197 * 198 * @param mutator The hook to call. Null is safe. 199 */ 200 public void addAttachmentLinkHook( final StringTransmutator mutator ) { 201 addLinkHook( m_attachmentLinkMutatorChain, mutator ); 202 } 203 204 void addLinkHook( final List< StringTransmutator > mutatorChain, final StringTransmutator mutator ) { 205 if( mutator != null ) { 206 mutatorChain.add( mutator ); 207 } 208 } 209 210 /** 211 * Adds a HeadingListener to the parser chain. It will be called whenever a parsed header is found. 212 * 213 * @param listener The listener to add. 214 */ 215 public void addHeadingListener( final HeadingListener listener ) { 216 if( listener != null ) { 217 m_headingListenerChain.add( listener ); 218 } 219 } 220 221 /** 222 * Disables access rule parsing. 223 */ 224 public void disableAccessRules() 225 { 226 m_parseAccessRules = false; 227 } 228 229 public boolean isParseAccessRules() 230 { 231 return m_parseAccessRules; 232 } 233 234 /** 235 * Use this to turn on or off image inlining. 236 * 237 * @param toggle If true, images are inlined (as per set in jspwiki.properties) 238 * If false, then images won't be inlined; instead, they will be 239 * treated as standard hyperlinks. 240 * @since 2.2.9 241 */ 242 public void enableImageInlining( final boolean toggle ) 243 { 244 m_inlineImages = toggle; 245 } 246 247 public boolean isImageInlining() { 248 return m_inlineImages; 249 } 250 251 protected final void initInlineImagePatterns() { 252 final PatternCompiler compiler = new GlobCompiler(); 253 254 // We cache compiled patterns in the engine, since their creation is really expensive 255 List< Pattern > compiledpatterns = m_engine.getAttribute( INLINE_IMAGE_PATTERNS ); 256 257 if( compiledpatterns == null ) { 258 compiledpatterns = new ArrayList< >( 20 ); 259 final Collection< String > ptrns = m_engine.getAllInlinedImagePatterns(); 260 261 // Make them into Regexp Patterns. Unknown patterns are ignored. 262 for( final String pattern : ptrns ) { 263 try { 264 compiledpatterns.add( compiler.compile( pattern, GlobCompiler.DEFAULT_MASK | GlobCompiler.READ_ONLY_MASK ) ); 265 } catch( final MalformedPatternException e ) { 266 LOG.error( "Malformed pattern [" + pattern + "] in properties: ", e ); 267 } 268 } 269 270 m_engine.setAttribute( INLINE_IMAGE_PATTERNS, compiledpatterns ); 271 } 272 273 m_inlineImagePatterns = Collections.unmodifiableList( compiledpatterns ); 274 } 275 276 public List< Pattern > getInlineImagePatterns() { 277 if( m_inlineImagePatterns == null ) { 278 initInlineImagePatterns(); 279 } 280 return m_inlineImagePatterns; 281 } 282 283 /** 284 * Parses the document. 285 * 286 * @return the parsed document, as a WikiDocument 287 * @throws IOException If something goes wrong. 288 */ 289 public abstract WikiDocument parse() throws IOException; 290 291 /** 292 * Return the current position in the reader stream. The value will be -1 prior to reading. 293 * 294 * @return the reader position as an int. 295 */ 296 public int getPosition() 297 { 298 return m_pos; 299 } 300 301 /** 302 * Returns the next token in the stream. This is the most called method in the entire parser, so it needs to be lean and mean. 303 * 304 * @return The next token in the stream; or, if the stream is ended, -1. 305 * @throws IOException If something bad happens 306 * @throws NullPointerException If you have not yet created an input document. 307 */ 308 protected final int nextToken() throws IOException, NullPointerException { 309 // if( m_in == null ) return -1; 310 m_pos++; 311 return m_in.read(); 312 } 313 314 /** 315 * Push back any character to the current input. Does not push back a read EOF, though. 316 * 317 * @param c Character to push back. 318 * @throws IOException In case the character cannot be pushed back. 319 */ 320 protected void pushBack( final int c ) throws IOException { 321 if( c != -1 && m_in != null ) { 322 m_pos--; 323 m_in.unread( c ); 324 } 325 } 326 327 /** 328 * Writes HTML for error message. Does not add it to the document, you have to do it yourself. 329 * 330 * @param error The error string. 331 * @return An Element containing the error. 332 */ 333 public static Element makeError( final String error ) { 334 return new Element( "span" ).setAttribute( "class", "error" ).addContent( error ); 335 } 336 337 /** 338 * Cleans a Wiki name. The functionality of this method was changed in 2.6 so that the list of allowed characters is much larger. 339 * Use {@link #wikifyLink(String)} to get the legacy behaviour. 340 * <P> 341 * [ This is a link ] -> This is a link 342 * 343 * @param link Link to be cleared. Null is safe, and causes this to return null. 344 * @return A cleaned link. 345 * 346 * @since 2.0 347 */ 348 public static String cleanLink( final String link ) { 349 return TextUtil.cleanString( link, TextUtil.PUNCTUATION_CHARS_ALLOWED ); 350 } 351 352 /** 353 * Cleans away extra legacy characters. This method functions exactly like pre-2.6 cleanLink() 354 * <P> 355 * [ This is a link ] -> ThisIsALink 356 * 357 * @param link Link to be cleared. Null is safe, and causes this to return null. 358 * @return A cleaned link. 359 * @since 2.6 360 */ 361 public static String wikifyLink( final String link ) { 362 return TextUtil.cleanString( link, TextUtil.LEGACY_CHARS_ALLOWED ); 363 } 364 365}