001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.htmltowiki; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.logging.log4j.LogManager; 023import org.apache.logging.log4j.Logger; 024import org.apache.wiki.api.core.Engine; 025import org.apache.wiki.util.ClassUtil; 026import org.apache.wiki.util.XmlUtil; 027import org.jdom2.Content; 028import org.jdom2.Element; 029import org.jdom2.JDOMException; 030import org.jdom2.Text; 031import org.jdom2.xpath.XPathFactory; 032 033import java.io.PrintWriter; 034import java.util.Arrays; 035import java.util.LinkedHashMap; 036import java.util.Map; 037import java.util.Stack; 038 039 040/** 041 * Converting XHtml to Wiki Markup. This is the class which orchestrates all the heavy loading. 042 */ 043public class XHtmlElementToWikiTranslator { 044 045 private static final Logger LOG = LogManager.getLogger( XHtmlElementToWikiTranslator.class ); 046 private static final String DEFAULT_SYNTAX_DECORATOR = "org.apache.wiki.htmltowiki.syntax.jspwiki.JSPWikiSyntaxDecorator"; 047 048 private final Engine e; 049 private final XHtmlToWikiConfig config; 050 private final WhitespaceTrimWriter outTrimmer = new WhitespaceTrimWriter(); 051 private final SyntaxDecorator syntax; 052 053 /** 054 * Create a new translator using the default config. 055 * 056 * @param base The base element from which to start translating. 057 * @throws JDOMException If the DOM tree is faulty. 058 */ 059 public XHtmlElementToWikiTranslator( final Engine e, final Element base ) throws JDOMException, ReflectiveOperationException { 060 this( e, base, new XHtmlToWikiConfig() ); 061 } 062 063 /** 064 * Create a new translator using the specified config. 065 * 066 * @param base The base element from which to start translating. 067 * @param config The config to use. 068 * @throws JDOMException If the DOM tree is faulty. 069 */ 070 public XHtmlElementToWikiTranslator( final Engine e, final Element base, final XHtmlToWikiConfig config ) throws JDOMException, ReflectiveOperationException { 071 this.e = e; 072 this.config = config; 073 syntax = getSyntaxDecorator(); 074 final PrintWriter out = new PrintWriter( outTrimmer ); 075 final Stack< String > liStack = new Stack<>(); 076 final Stack< String > preStack = new PreStack(); 077 078 syntax.init( out, liStack, preStack, outTrimmer, config, this ); 079 translate( base ); 080 } 081 082 SyntaxDecorator getSyntaxDecorator() throws ReflectiveOperationException { 083 String sdClass = e.getWikiProperties().getProperty( "jspwiki.syntax.decorator", DEFAULT_SYNTAX_DECORATOR ); 084 if( !ClassUtil.assignable( sdClass, SyntaxDecorator.class.getName() ) ) { 085 LOG.warn( "{} does not subclass {} reverting to default syntax decorator.", sdClass, SyntaxDecorator.class.getName() ); 086 sdClass = DEFAULT_SYNTAX_DECORATOR; 087 } 088 LOG.info( "Using {} as markup parser.", sdClass ); 089 return ClassUtil.buildInstance( sdClass ); 090 } 091 092 /** 093 * Outputs parsed wikitext. 094 * 095 * @return parsed wikitext. 096 */ 097 public String getWikiString() { 098 return outTrimmer.toString(); 099 } 100 101 public void translate( final Content element ) throws JDOMException { 102 if( element instanceof Text ) { 103 translateText( ( Text ) element ); 104 } else if( element instanceof Element ) { 105 final Element base = ( Element )element; 106 if( "imageplugin".equals( base.getAttributeValue( "class" ) ) ) { 107 translateImage( base ); 108 } else if( "wikiform".equals( base.getAttributeValue( "class" ) ) ) { 109 // only print the children if the div's class="wikiform", but not the div itself. 110 translateChildren( base ); 111 } else { 112 translateParagraph( base ); 113 } 114 } 115 } 116 117 public void translateText( final Text element ) { 118 syntax.text( element ); 119 } 120 121 public void translateImage( final Element base ) { 122 Element child = XmlUtil.getXPathElement( base, "TBODY/TR/TD/*" ); 123 if( child == null ) { 124 child = base; 125 } 126 final Element img; 127 final String href; 128 if( child.getName().equals( "A" ) ) { 129 img = child.getChild( "IMG" ); 130 href = child.getAttributeValue( "href" ); 131 } else { 132 img = child; 133 href = null; 134 } 135 if( img == null ) { 136 return; 137 } 138 final String src = config.trimLink( img.getAttributeValue( "src" ) ); 139 if( src == null ) { 140 return; 141 } 142 143 final Map< String, Object > imageAttrs = new LinkedHashMap<>(); 144 putIfNotEmpty( imageAttrs, "align", base.getAttributeValue( "align" ) ); 145 putIfNotEmpty( imageAttrs, "height", img.getAttributeValue( "height" ) ); 146 putIfNotEmpty( imageAttrs, "width", img.getAttributeValue( "width" ) ); 147 putIfNotEmpty( imageAttrs, "alt", img.getAttributeValue( "alt" ) ); 148 putIfNotEmpty( imageAttrs, "caption", emptyToNull( ( Element )XPathFactory.instance().compile( "CAPTION" ).evaluateFirst( base ) ) ); 149 putIfNotEmpty( imageAttrs, "link", href ); 150 putIfNotEmpty( imageAttrs, "border", img.getAttributeValue( "border" ) ); 151 putIfNotEmpty( imageAttrs, "style", base.getAttributeValue( "style" ) ); 152 syntax.image( src, imageAttrs ); 153 } 154 155 private void putIfNotEmpty( final Map< String, Object > map, final String key, final Object value ) { 156 if( value != null ) { 157 map.put( key, value ); 158 } 159 } 160 161 private String emptyToNull( final Element e ) { 162 if( e == null ) { 163 return null; 164 } 165 final String s = e.getText(); 166 return s == null ? null : ( s.replaceAll( "\\s", "" ).isEmpty() ? null : s ); 167 } 168 169 private Map< Object, Object > getStylePropertiesLowerCase( final Element base ) { 170 final String n = base.getName().toLowerCase(); 171 172 // "font-weight: bold; font-style: italic;" 173 String style = base.getAttributeValue( "style" ); 174 if( style == null ) { 175 style = ""; 176 } 177 178 if( n.equals( "p" ) || n.equals( "div" ) ) { 179 final String align = base.getAttributeValue( "align" ); 180 if( align != null ) { 181 // only add the value of the align attribute if the text-align style didn't already exist. 182 if( !style.contains( "text-align" ) ) { 183 style += ";text-align:" + align + ";"; 184 } 185 } 186 } 187 188 if( n.equals( "font" ) ) { 189 final String color = base.getAttributeValue( "color" ); 190 final String face = base.getAttributeValue( "face" ); 191 final String size = base.getAttributeValue( "size" ); 192 if( color != null ) { 193 style = style + "color:" + color + ";"; 194 } 195 if( face != null ) { 196 style = style + "font-family:" + face + ";"; 197 } 198 if( size != null ) { 199 switch ( size ) { 200 case "1": style += "font-size:xx-small;"; break; 201 case "2": style += "font-size:x-small;"; break; 202 case "3": style += "font-size:small;"; break; 203 case "4": style += "font-size:medium;"; break; 204 case "5": style += "font-size:large;"; break; 205 case "6": style += "font-size:x-large;"; break; 206 case "7": style += "font-size:xx-large;"; break; 207 } 208 } 209 } 210 211 if( style.equals( "" ) ) { 212 return null; 213 } 214 215 final Map< Object, Object > m = new LinkedHashMap<>(); 216 Arrays.stream( style.toLowerCase().split( ";" ) ) 217 .filter( StringUtils::isNotEmpty ) 218 .forEach( prop -> m.put( prop.split( ":" )[ 0 ].trim(), prop.split( ":" )[ 1 ].trim() ) ); 219 return m; 220 } 221 222 private String propsToStyleString( final Map< Object, Object > styleProps ) { 223 final StringBuilder style = new StringBuilder(); 224 for( final Map.Entry< Object, Object > entry : styleProps.entrySet() ) { 225 style.append( " " ).append( entry.getKey() ).append( ": " ).append( entry.getValue() ).append( ";" ); 226 } 227 return style.toString(); 228 } 229 230 public void translateChildren( final Element base ) throws JDOMException { 231 for( final Content c : base.getContent() ) { 232 if( c instanceof Element ) { 233 final Element e = ( Element )c; 234 final String n = e.getName().toLowerCase(); 235 switch( n ) { 236 case "h1": syntax.h1( e ); break; 237 case "h2": syntax.h2( e ); break; 238 case "h3": syntax.h3( e ); break; 239 case "h4": syntax.h4( e ); break; 240 case "p": syntax.p( e ); break; 241 case "br": syntax.br( base, e ); break; 242 case "hr": syntax.hr( e ); break; 243 case "table": syntax.table( e ); break; 244 case "tr": syntax.tr( e ); break; 245 case "td": syntax.td( e ); break; 246 case "th": syntax.th( e ); break; 247 case "a": syntax.a( e ); break; 248 case "b": 249 case "strong": syntax.strong( e ); break; 250 case "i": 251 case "em": 252 case "address": syntax.em( e ); break; 253 case "u": syntax.underline( e ); break; 254 case "strike": syntax.strike( e ); break; 255 case "sub": syntax.sub( e ); break; 256 case "sup": syntax.sup( e ); break; 257 case "dl": syntax.dl( e ); break; 258 case "dt": syntax.dt( e ); break; 259 case "dd": syntax.dd( e ); break; 260 case "ul": syntax.ul( e ); break; 261 case "ol": syntax.ol( e ); break; 262 case "li": syntax.li( base, e ); break; 263 case "pre": syntax.pre( e ); break; 264 case "code": 265 case "tt": syntax.code( e ); break; 266 case "img": syntax.img( e ); break; 267 case "form": syntax.form( e ); break; 268 case "input": syntax.input( e ); break; 269 case "textarea": syntax.textarea( e ); break; 270 case "select": syntax.select( e ); break; 271 case "option": syntax.option( base, e ); break; 272 default: translate( e ); break; 273 } 274 } else { 275 translate( c ); 276 } 277 } 278 } 279 280 public void translateParagraph( final Element base ) throws JDOMException { 281 final ElementDecoratorData dto = buildElementDecoratorDataFrom( base ); 282 syntax.paragraph( dto ); 283 } 284 285 ElementDecoratorData buildElementDecoratorDataFrom( final Element base ) { 286 String n = base.getName().toLowerCase(); 287 boolean bold = false; 288 boolean italic = false; 289 boolean monospace = false; 290 String cssSpecial = null; 291 final String cssClass = base.getAttributeValue( "class" ); 292 293 // accomodate a FCKeditor bug with Firefox: when a link is removed, it becomes <span class="wikipage">text</span>. 294 final boolean ignoredCssClass = cssClass != null && cssClass.matches( "wikipage|createpage|external|interwiki|attachment|inline-code" ); 295 296 Map< Object, Object > styleProps = null; 297 298 // Only get the styles if it's not a link element. Styles for link elements are handled as an AugmentedWikiLink instead. 299 if( !n.equals( "a" ) ) { 300 styleProps = getStylePropertiesLowerCase( base ); 301 } 302 303 if( "inline-code".equals( cssClass ) ) { 304 monospace = true; 305 } 306 307 if( styleProps != null ) { 308 final String weight = ( String ) styleProps.remove( "font-weight" ); 309 final String style = ( String ) styleProps.remove( "font-style" ); 310 311 if ( n.equals( "p" ) ) { 312 // change it, so we can print out the css styles for <p> 313 n = "div"; 314 } 315 316 italic = "oblique".equals( style ) || "italic".equals( style ); 317 bold = "bold".equals( weight ) || "bolder".equals( weight ); 318 if ( !styleProps.isEmpty() ) { 319 cssSpecial = propsToStyleString( styleProps ); 320 } 321 } 322 323 final ElementDecoratorData dto = new ElementDecoratorData(); 324 dto.base = base; 325 dto.bold = bold; 326 dto.cssClass = cssClass; 327 dto.cssSpecial = cssSpecial; 328 dto.htmlBase = n; 329 dto.ignoredCssClass = ignoredCssClass; 330 dto.italic = italic; 331 dto.monospace = monospace; 332 return dto; 333 } 334 335 private class PreStack extends Stack< String > { 336 337 @Override 338 public String push( final String item ) { 339 final String push = super.push( item ); 340 outTrimmer.setWhitespaceTrimMode( isEmpty() ); 341 return push; 342 } 343 344 @Override 345 public synchronized String pop() { 346 final String pop = super.pop(); 347 outTrimmer.setWhitespaceTrimMode( isEmpty() ); 348 return pop; 349 } 350 351 } 352 353 /** 354 * Simple data placeholder class to move decoration state between plain text syntax translation related classes. 355 */ 356 public static class ElementDecoratorData { 357 358 /** don't allow instantiation outside enclosing class. */ 359 private ElementDecoratorData() {} 360 361 public Element base; 362 public String htmlBase; 363 public String cssClass; 364 public String cssSpecial; 365 public boolean monospace; 366 public boolean bold; 367 public boolean italic; 368 public boolean ignoredCssClass; 369 } 370 371}