001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.htmltowiki; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.logging.log4j.LogManager; 023import org.apache.logging.log4j.Logger; 024import org.apache.wiki.api.core.Engine; 025import org.apache.wiki.htmltowiki.syntax.MarkupHelper; 026import org.apache.wiki.util.ClassUtil; 027import org.apache.wiki.util.XmlUtil; 028import org.jdom2.Content; 029import org.jdom2.Element; 030import org.jdom2.JDOMException; 031import org.jdom2.Verifier; 032import org.jdom2.Text; 033import org.jdom2.xpath.XPathFactory; 034 035import java.io.PrintWriter; 036import java.util.ArrayDeque; 037import java.util.Arrays; 038import java.util.Deque; 039import java.util.LinkedHashMap; 040import java.util.Map; 041 042 043/** 044 * Converting XHtml to Wiki Markup. This is the class which orchestrates all the heavy loading. 045 */ 046public class XHtmlElementToWikiTranslator { 047 048 private static final Logger LOG = LogManager.getLogger( XHtmlElementToWikiTranslator.class ); 049 private static final String DEFAULT_SYNTAX_DECORATOR = "org.apache.wiki.htmltowiki.syntax.jspwiki.JSPWikiSyntaxDecorator"; 050 051 private final Engine e; 052 private final XHtmlToWikiConfig config; 053 private final WhitespaceTrimWriter outTrimmer = new WhitespaceTrimWriter(); 054 private final SyntaxDecorator syntax; 055 056 /** 057 * Create a new translator using the default config. 058 * 059 * @param base The base element from which to start translating. 060 * @throws JDOMException If the DOM tree is faulty. 061 */ 062 public XHtmlElementToWikiTranslator( final Engine e, final Element base ) throws JDOMException, ReflectiveOperationException { 063 this( e, base, new XHtmlToWikiConfig() ); 064 } 065 066 /** 067 * Create a new translator using the specified config. 068 * 069 * @param base The base element from which to start translating. 070 * @param config The config to use. 071 * @throws JDOMException If the DOM tree is faulty. 072 */ 073 public XHtmlElementToWikiTranslator( final Engine e, final Element base, final XHtmlToWikiConfig config ) throws JDOMException, ReflectiveOperationException { 074 this.e = e; 075 this.config = config; 076 syntax = getSyntaxDecorator(); 077 final PrintWriter out = new PrintWriter( outTrimmer ); 078 final Deque< String > liStack = new ArrayDeque<>(); 079 final Deque< String > preStack = new PreDeque(); 080 081 syntax.init( out, liStack, preStack, outTrimmer, config, this ); 082 translate( base ); 083 } 084 085 SyntaxDecorator getSyntaxDecorator() throws ReflectiveOperationException { 086 String sdClass = e.getWikiProperties().getProperty( "jspwiki.syntax.decorator", DEFAULT_SYNTAX_DECORATOR ); 087 if( !ClassUtil.assignable( sdClass, SyntaxDecorator.class.getName() ) ) { 088 LOG.warn( "{} does not subclass {} reverting to default syntax decorator.", sdClass, SyntaxDecorator.class.getName() ); 089 sdClass = DEFAULT_SYNTAX_DECORATOR; 090 } 091 LOG.debug( "Using {} as markup parser.", sdClass ); 092 return ClassUtil.buildInstance( sdClass ); 093 } 094 095 /** 096 * Outputs parsed wikitext. 097 * 098 * @return parsed wikitext. 099 */ 100 public String getWikiString() { 101 return outTrimmer.toString(); 102 } 103 104 public void translate( final Content element ) throws JDOMException { 105 if( element instanceof Text ) { 106 translateText( ( Text ) element ); 107 } else if( element instanceof Element ) { 108 final Element base = ( Element )element; 109 final String n = base.getName().toLowerCase(); 110 final String reason = Verifier.checkElementName(n); 111 112 if (reason != null) return; /* invalid element name */ 113 114 if( "script".equals( n ) ) return; /* nono, not a good idea*/ 115 116 if( "imageplugin".equals( base.getAttributeValue( "class" ) ) ) { 117 translateImage( base ); 118 } else if( "wikiform".equals( base.getAttributeValue( "class" ) ) ) { 119 // only print the children if the div's class="wikiform", but not the div itself. 120 translateChildren( base ); 121 } else { 122 translateParagraph( base ); 123 } 124 } 125 } 126 127 public void translateText( final Text element ) { 128 syntax.text( element ); 129 } 130 131 public void translateImage( final Element base ) { 132 Element child = XmlUtil.getXPathElement( base, "TBODY/TR/TD/*" ); 133 if( child == null ) { 134 child = base; 135 } 136 final Element img; 137 final String href; 138 if( child.getName().equals( "A" ) ) { 139 img = child.getChild( "IMG" ); 140 href = child.getAttributeValue( "href" ); 141 } else { 142 img = child; 143 href = null; 144 } 145 if( img == null ) { 146 return; 147 } 148 final String src = config.trimLink( img.getAttributeValue( "src" ) ); 149 if( src == null ) { 150 return; 151 } 152 153 final Map< String, Object > imageAttrs = new LinkedHashMap<>(); 154 putIfNotEmpty( imageAttrs, "align", base.getAttributeValue( "align" ) ); 155 putIfNotEmpty( imageAttrs, "height", img.getAttributeValue( "height" ) ); 156 putIfNotEmpty( imageAttrs, "width", img.getAttributeValue( "width" ) ); 157 putIfNotEmpty( imageAttrs, "alt", img.getAttributeValue( "alt" ) ); 158 putIfNotEmpty( imageAttrs, "caption", emptyToNull( ( Element )XPathFactory.instance().compile( "CAPTION" ).evaluateFirst( base ) ) ); 159 putIfNotEmpty( imageAttrs, "link", href ); 160 putIfNotEmpty( imageAttrs, "border", img.getAttributeValue( "border" ) ); 161 putIfNotEmpty( imageAttrs, "style", base.getAttributeValue( "style" ) ); 162 syntax.image( src, imageAttrs ); 163 } 164 165 private void putIfNotEmpty( final Map< String, Object > map, final String key, final Object value ) { 166 if( value != null ) { 167 map.put( key, value ); 168 } 169 } 170 171 private String emptyToNull( final Element e ) { 172 if( e == null ) { 173 return null; 174 } 175 final String s = e.getText(); 176 return s == null ? null : ( s.replaceAll( "\\s", "" ).isEmpty() ? null : s ); 177 } 178 179 private Map< Object, Object > getStylePropertiesLowerCase( final Element base ) { 180 final String n = base.getName().toLowerCase(); 181 182 // "font-weight: bold; font-style: italic;" 183 String style = base.getAttributeValue( "style" ); 184 if( style == null ) { 185 style = ""; 186 } 187 188 if( n.equals( "p" ) || n.equals( "div" ) ) { 189 final String align = base.getAttributeValue( "align" ); 190 if( align != null ) { 191 // only add the value of the align attribute if the text-align style didn't already exist. 192 if( !style.contains( "text-align" ) ) { 193 style += ";text-align:" + align + ";"; 194 } 195 } 196 } 197 198 if( n.equals( "font" ) ) { 199 final String color = base.getAttributeValue( "color" ); 200 final String face = base.getAttributeValue( "face" ); 201 final String size = base.getAttributeValue( "size" ); 202 if( color != null ) { 203 style = style + "color:" + color + ";"; 204 } 205 if( face != null ) { 206 style = style + "font-family:" + face + ";"; 207 } 208 if( size != null ) { 209 switch ( size ) { 210 case "1": style += "font-size:xx-small;"; break; 211 case "2": style += "font-size:x-small;"; break; 212 case "3": style += "font-size:small;"; break; 213 case "4": style += "font-size:medium;"; break; 214 case "5": style += "font-size:large;"; break; 215 case "6": style += "font-size:x-large;"; break; 216 case "7": style += "font-size:xx-large;"; break; 217 } 218 } 219 } 220 221 if( style.equals( "" ) ) { 222 return null; 223 } 224 225 final Map< Object, Object > m = new LinkedHashMap<>(); 226 Arrays.stream( style.toLowerCase().split( ";" ) ) 227 .filter( StringUtils::isNotEmpty ) 228 .forEach( prop -> m.put( prop.split( ":" )[ 0 ].trim(), prop.split( ":" )[ 1 ].trim() ) ); 229 return m; 230 } 231 232 private String propsToStyleString( final Map< Object, Object > styleProps ) { 233 final StringBuilder style = new StringBuilder(); 234 for( final Map.Entry< Object, Object > entry : styleProps.entrySet() ) { 235 style.append( " " ).append( entry.getKey() ).append( ": " ).append( entry.getValue() ).append( ";" ); 236 } 237 return style.toString(); 238 } 239 240 public void translateChildren( final Element base ) throws JDOMException { 241 for( final Content c : base.getContent() ) { 242 if( c instanceof Element ) { 243 final Element e = ( Element )c; 244 final String n = e.getName().toLowerCase(); 245 switch( n ) { 246 case "h1": syntax.h1( e ); break; 247 case "h2": syntax.h2( e ); break; 248 case "h3": syntax.h3( e ); break; 249 case "h4": syntax.h4( e ); break; 250 case "p": syntax.p( e ); break; 251 case "br": syntax.br( base, e ); break; 252 case "hr": syntax.hr( e ); break; 253 case "table": syntax.table( e ); break; 254 case "tbody": syntax.tbody( e ); break; 255 case "tr": syntax.tr( e ); break; 256 case "td": syntax.td( e ); break; 257 case "thead": syntax.thead( e ); break; 258 case "th": syntax.th( e ); break; 259 case "a": translateA( e ); break; 260 case "b": 261 case "strong": syntax.strong( e ); break; 262 case "i": 263 case "em": 264 case "address": syntax.em( e ); break; 265 case "u": syntax.underline( e ); break; 266 case "strike": syntax.strike( e ); break; 267 case "sub": syntax.sub( e ); break; 268 case "sup": syntax.sup( e ); break; 269 case "dl": syntax.dl( e ); break; 270 case "dt": syntax.dt( e ); break; 271 case "dd": syntax.dd( e ); break; 272 case "ul": syntax.ul( e ); break; 273 case "ol": syntax.ol( e ); break; 274 case "li": syntax.li( base, e ); break; 275 case "pre": syntax.pre( e ); break; 276 case "code": 277 case "tt": syntax.code( e ); break; 278 case "img": syntax.img( e ); break; 279 case "form": syntax.form( e ); break; 280 case "input": syntax.input( e ); break; 281 case "textarea": syntax.textarea( e ); break; 282 case "select": syntax.select( e ); break; 283 case "option": syntax.option( base, e ); break; 284 default: translate( e ); break; 285 } 286 } else { 287 translate( c ); 288 } 289 } 290 } 291 292 void translateA( final Element e ) throws JDOMException { 293 if( config.isNotIgnorableWikiMarkupLink( e ) ) { 294 if( e.getChild( "IMG" ) != null ) { 295 translateImage( e ); 296 } else { 297 final String ref = config.trimLink( e.getAttributeValue( "href" ) ); 298 if( ref == null ) { 299 if( MarkupHelper.isUndefinedPageLink( e ) ) { 300 syntax.aUndefined( e ); 301 } else { 302 translate( e ); 303 } 304 } else if( MarkupHelper.isFootnoteLink( ref ) ) { 305 final String href = ref.replaceFirst( "#ref-.+-(\\d+)", "$1" ); // convert "#ref-PageName-1" to just "1" 306 final String textValue = e.getValue().substring( 1, ( e.getValue().length() - 1 ) ); // remove the brackets around "[1]" 307 syntax.aFootnote( textValue, href ); 308 } else { 309 syntax.a( e, ref ); 310 } 311 } 312 } 313 } 314 315 public void translateParagraph( final Element base ) throws JDOMException { 316 final ElementDecoratorData dto = buildElementDecoratorDataFrom( base ); 317 syntax.paragraph( dto ); 318 } 319 320 ElementDecoratorData buildElementDecoratorDataFrom( final Element base ) { 321 String n = base.getName().toLowerCase(); 322 boolean bold = false; 323 boolean italic = false; 324 boolean monospace = false; 325 String cssSpecial = null; 326 final String cssClass = base.getAttributeValue( "class" ); 327 328 // accomodate a FCKeditor bug with Firefox: when a link is removed, it becomes <span class="wikipage">text</span>. 329 final boolean ignoredCssClass = cssClass != null && cssClass.matches( "wikipage|createpage|external|interwiki|attachment|inline-code" ); 330 331 Map< Object, Object > styleProps = null; 332 333 // Only get the styles if it's not a link element. Styles for link elements are handled as an AugmentedWikiLink instead. 334 if( !n.equals( "a" ) ) { 335 styleProps = getStylePropertiesLowerCase( base ); 336 } 337 338 if( "inline-code".equals( cssClass ) ) { 339 monospace = true; 340 } 341 342 if( styleProps != null ) { 343 final String weight = ( String ) styleProps.remove( "font-weight" ); 344 final String style = ( String ) styleProps.remove( "font-style" ); 345 346 if ( n.equals( "p" ) ) { 347 // change it, so we can print out the css styles for <p> 348 n = "div"; 349 } 350 351 italic = "oblique".equals( style ) || "italic".equals( style ); 352 bold = "bold".equals( weight ) || "bolder".equals( weight ); 353 if ( !styleProps.isEmpty() ) { 354 cssSpecial = propsToStyleString( styleProps ); 355 } 356 } 357 358 final ElementDecoratorData dto = new ElementDecoratorData(); 359 dto.base = base; 360 dto.bold = bold; 361 dto.cssClass = cssClass; 362 dto.cssSpecial = cssSpecial; 363 dto.htmlBase = n; 364 dto.ignoredCssClass = ignoredCssClass; 365 dto.italic = italic; 366 dto.monospace = monospace; 367 return dto; 368 } 369 370 private class PreDeque extends ArrayDeque< String > { 371 372 @Override 373 public void addFirst( final String item ) { 374 super.addFirst( item ); 375 outTrimmer.setWhitespaceTrimMode( isEmpty() ); 376 } 377 378 @Override 379 public String removeFirst() { 380 final String pop = super.removeFirst(); 381 outTrimmer.setWhitespaceTrimMode( isEmpty() ); 382 return pop; 383 } 384 385 } 386 387 /** 388 * Simple data placeholder class to move decoration state between plain text syntax translation related classes. 389 */ 390 public static class ElementDecoratorData { 391 392 /** don't allow instantiation outside enclosing class. */ 393 private ElementDecoratorData() {} 394 395 public Element base; 396 public String htmlBase; 397 public String cssClass; 398 public String cssSpecial; 399 public boolean monospace; 400 public boolean bold; 401 public boolean italic; 402 public boolean ignoredCssClass; 403 } 404 405}