001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.htmltowiki; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.logging.log4j.LogManager; 023import org.apache.logging.log4j.Logger; 024import org.apache.wiki.api.core.Engine; 025import org.apache.wiki.htmltowiki.syntax.MarkupHelper; 026import org.apache.wiki.util.ClassUtil; 027import org.apache.wiki.util.XmlUtil; 028import org.jdom2.Content; 029import org.jdom2.Element; 030import org.jdom2.JDOMException; 031import org.jdom2.Text; 032import org.jdom2.xpath.XPathFactory; 033 034import java.io.PrintWriter; 035import java.util.ArrayDeque; 036import java.util.Arrays; 037import java.util.Deque; 038import java.util.LinkedHashMap; 039import java.util.Map; 040 041 042/** 043 * Converting XHtml to Wiki Markup. This is the class which orchestrates all the heavy loading. 044 */ 045public class XHtmlElementToWikiTranslator { 046 047 private static final Logger LOG = LogManager.getLogger( XHtmlElementToWikiTranslator.class ); 048 private static final String DEFAULT_SYNTAX_DECORATOR = "org.apache.wiki.htmltowiki.syntax.jspwiki.JSPWikiSyntaxDecorator"; 049 050 private final Engine e; 051 private final XHtmlToWikiConfig config; 052 private final WhitespaceTrimWriter outTrimmer = new WhitespaceTrimWriter(); 053 private final SyntaxDecorator syntax; 054 055 /** 056 * Create a new translator using the default config. 057 * 058 * @param base The base element from which to start translating. 059 * @throws JDOMException If the DOM tree is faulty. 060 */ 061 public XHtmlElementToWikiTranslator( final Engine e, final Element base ) throws JDOMException, ReflectiveOperationException { 062 this( e, base, new XHtmlToWikiConfig() ); 063 } 064 065 /** 066 * Create a new translator using the specified config. 067 * 068 * @param base The base element from which to start translating. 069 * @param config The config to use. 070 * @throws JDOMException If the DOM tree is faulty. 071 */ 072 public XHtmlElementToWikiTranslator( final Engine e, final Element base, final XHtmlToWikiConfig config ) throws JDOMException, ReflectiveOperationException { 073 this.e = e; 074 this.config = config; 075 syntax = getSyntaxDecorator(); 076 final PrintWriter out = new PrintWriter( outTrimmer ); 077 final Deque< String > liStack = new ArrayDeque<>(); 078 final Deque< String > preStack = new PreDeque(); 079 080 syntax.init( out, liStack, preStack, outTrimmer, config, this ); 081 translate( base ); 082 } 083 084 SyntaxDecorator getSyntaxDecorator() throws ReflectiveOperationException { 085 String sdClass = e.getWikiProperties().getProperty( "jspwiki.syntax.decorator", DEFAULT_SYNTAX_DECORATOR ); 086 if( !ClassUtil.assignable( sdClass, SyntaxDecorator.class.getName() ) ) { 087 LOG.warn( "{} does not subclass {} reverting to default syntax decorator.", sdClass, SyntaxDecorator.class.getName() ); 088 sdClass = DEFAULT_SYNTAX_DECORATOR; 089 } 090 LOG.info( "Using {} as markup parser.", sdClass ); 091 return ClassUtil.buildInstance( sdClass ); 092 } 093 094 /** 095 * Outputs parsed wikitext. 096 * 097 * @return parsed wikitext. 098 */ 099 public String getWikiString() { 100 return outTrimmer.toString(); 101 } 102 103 public void translate( final Content element ) throws JDOMException { 104 if( element instanceof Text ) { 105 translateText( ( Text ) element ); 106 } else if( element instanceof Element ) { 107 final Element base = ( Element )element; 108 if( "imageplugin".equals( base.getAttributeValue( "class" ) ) ) { 109 translateImage( base ); 110 } else if( "wikiform".equals( base.getAttributeValue( "class" ) ) ) { 111 // only print the children if the div's class="wikiform", but not the div itself. 112 translateChildren( base ); 113 } else { 114 translateParagraph( base ); 115 } 116 } 117 } 118 119 public void translateText( final Text element ) { 120 syntax.text( element ); 121 } 122 123 public void translateImage( final Element base ) { 124 Element child = XmlUtil.getXPathElement( base, "TBODY/TR/TD/*" ); 125 if( child == null ) { 126 child = base; 127 } 128 final Element img; 129 final String href; 130 if( child.getName().equals( "A" ) ) { 131 img = child.getChild( "IMG" ); 132 href = child.getAttributeValue( "href" ); 133 } else { 134 img = child; 135 href = null; 136 } 137 if( img == null ) { 138 return; 139 } 140 final String src = config.trimLink( img.getAttributeValue( "src" ) ); 141 if( src == null ) { 142 return; 143 } 144 145 final Map< String, Object > imageAttrs = new LinkedHashMap<>(); 146 putIfNotEmpty( imageAttrs, "align", base.getAttributeValue( "align" ) ); 147 putIfNotEmpty( imageAttrs, "height", img.getAttributeValue( "height" ) ); 148 putIfNotEmpty( imageAttrs, "width", img.getAttributeValue( "width" ) ); 149 putIfNotEmpty( imageAttrs, "alt", img.getAttributeValue( "alt" ) ); 150 putIfNotEmpty( imageAttrs, "caption", emptyToNull( ( Element )XPathFactory.instance().compile( "CAPTION" ).evaluateFirst( base ) ) ); 151 putIfNotEmpty( imageAttrs, "link", href ); 152 putIfNotEmpty( imageAttrs, "border", img.getAttributeValue( "border" ) ); 153 putIfNotEmpty( imageAttrs, "style", base.getAttributeValue( "style" ) ); 154 syntax.image( src, imageAttrs ); 155 } 156 157 private void putIfNotEmpty( final Map< String, Object > map, final String key, final Object value ) { 158 if( value != null ) { 159 map.put( key, value ); 160 } 161 } 162 163 private String emptyToNull( final Element e ) { 164 if( e == null ) { 165 return null; 166 } 167 final String s = e.getText(); 168 return s == null ? null : ( s.replaceAll( "\\s", "" ).isEmpty() ? null : s ); 169 } 170 171 private Map< Object, Object > getStylePropertiesLowerCase( final Element base ) { 172 final String n = base.getName().toLowerCase(); 173 174 // "font-weight: bold; font-style: italic;" 175 String style = base.getAttributeValue( "style" ); 176 if( style == null ) { 177 style = ""; 178 } 179 180 if( n.equals( "p" ) || n.equals( "div" ) ) { 181 final String align = base.getAttributeValue( "align" ); 182 if( align != null ) { 183 // only add the value of the align attribute if the text-align style didn't already exist. 184 if( !style.contains( "text-align" ) ) { 185 style += ";text-align:" + align + ";"; 186 } 187 } 188 } 189 190 if( n.equals( "font" ) ) { 191 final String color = base.getAttributeValue( "color" ); 192 final String face = base.getAttributeValue( "face" ); 193 final String size = base.getAttributeValue( "size" ); 194 if( color != null ) { 195 style = style + "color:" + color + ";"; 196 } 197 if( face != null ) { 198 style = style + "font-family:" + face + ";"; 199 } 200 if( size != null ) { 201 switch ( size ) { 202 case "1": style += "font-size:xx-small;"; break; 203 case "2": style += "font-size:x-small;"; break; 204 case "3": style += "font-size:small;"; break; 205 case "4": style += "font-size:medium;"; break; 206 case "5": style += "font-size:large;"; break; 207 case "6": style += "font-size:x-large;"; break; 208 case "7": style += "font-size:xx-large;"; break; 209 } 210 } 211 } 212 213 if( style.equals( "" ) ) { 214 return null; 215 } 216 217 final Map< Object, Object > m = new LinkedHashMap<>(); 218 Arrays.stream( style.toLowerCase().split( ";" ) ) 219 .filter( StringUtils::isNotEmpty ) 220 .forEach( prop -> m.put( prop.split( ":" )[ 0 ].trim(), prop.split( ":" )[ 1 ].trim() ) ); 221 return m; 222 } 223 224 private String propsToStyleString( final Map< Object, Object > styleProps ) { 225 final StringBuilder style = new StringBuilder(); 226 for( final Map.Entry< Object, Object > entry : styleProps.entrySet() ) { 227 style.append( " " ).append( entry.getKey() ).append( ": " ).append( entry.getValue() ).append( ";" ); 228 } 229 return style.toString(); 230 } 231 232 public void translateChildren( final Element base ) throws JDOMException { 233 for( final Content c : base.getContent() ) { 234 if( c instanceof Element ) { 235 final Element e = ( Element )c; 236 final String n = e.getName().toLowerCase(); 237 switch( n ) { 238 case "h1": syntax.h1( e ); break; 239 case "h2": syntax.h2( e ); break; 240 case "h3": syntax.h3( e ); break; 241 case "h4": syntax.h4( e ); break; 242 case "p": syntax.p( e ); break; 243 case "br": syntax.br( base, e ); break; 244 case "hr": syntax.hr( e ); break; 245 case "table": syntax.table( e ); break; 246 case "tbody": syntax.tbody( e ); break; 247 case "tr": syntax.tr( e ); break; 248 case "td": syntax.td( e ); break; 249 case "thead": syntax.thead( e ); break; 250 case "th": syntax.th( e ); break; 251 case "a": translateA( e ); break; 252 case "b": 253 case "strong": syntax.strong( e ); break; 254 case "i": 255 case "em": 256 case "address": syntax.em( e ); break; 257 case "u": syntax.underline( e ); break; 258 case "strike": syntax.strike( e ); break; 259 case "sub": syntax.sub( e ); break; 260 case "sup": syntax.sup( e ); break; 261 case "dl": syntax.dl( e ); break; 262 case "dt": syntax.dt( e ); break; 263 case "dd": syntax.dd( e ); break; 264 case "ul": syntax.ul( e ); break; 265 case "ol": syntax.ol( e ); break; 266 case "li": syntax.li( base, e ); break; 267 case "pre": syntax.pre( e ); break; 268 case "code": 269 case "tt": syntax.code( e ); break; 270 case "img": syntax.img( e ); break; 271 case "form": syntax.form( e ); break; 272 case "input": syntax.input( e ); break; 273 case "textarea": syntax.textarea( e ); break; 274 case "select": syntax.select( e ); break; 275 case "option": syntax.option( base, e ); break; 276 default: translate( e ); break; 277 } 278 } else { 279 translate( c ); 280 } 281 } 282 } 283 284 void translateA( final Element e ) throws JDOMException { 285 if( config.isNotIgnorableWikiMarkupLink( e ) ) { 286 if( e.getChild( "IMG" ) != null ) { 287 translateImage( e ); 288 } else { 289 final String ref = config.trimLink( e.getAttributeValue( "href" ) ); 290 if( ref == null ) { 291 if( MarkupHelper.isUndefinedPageLink( e ) ) { 292 syntax.aUndefined( e ); 293 } else { 294 translate( e ); 295 } 296 } else if( MarkupHelper.isFootnoteLink( ref ) ) { 297 final String href = ref.replaceFirst( "#ref-.+-(\\d+)", "$1" ); // convert "#ref-PageName-1" to just "1" 298 final String textValue = e.getValue().substring( 1, ( e.getValue().length() - 1 ) ); // remove the brackets around "[1]" 299 syntax.aFootnote( textValue, href ); 300 } else { 301 syntax.a( e, ref ); 302 } 303 } 304 } 305 } 306 307 public void translateParagraph( final Element base ) throws JDOMException { 308 final ElementDecoratorData dto = buildElementDecoratorDataFrom( base ); 309 syntax.paragraph( dto ); 310 } 311 312 ElementDecoratorData buildElementDecoratorDataFrom( final Element base ) { 313 String n = base.getName().toLowerCase(); 314 boolean bold = false; 315 boolean italic = false; 316 boolean monospace = false; 317 String cssSpecial = null; 318 final String cssClass = base.getAttributeValue( "class" ); 319 320 // accomodate a FCKeditor bug with Firefox: when a link is removed, it becomes <span class="wikipage">text</span>. 321 final boolean ignoredCssClass = cssClass != null && cssClass.matches( "wikipage|createpage|external|interwiki|attachment|inline-code" ); 322 323 Map< Object, Object > styleProps = null; 324 325 // Only get the styles if it's not a link element. Styles for link elements are handled as an AugmentedWikiLink instead. 326 if( !n.equals( "a" ) ) { 327 styleProps = getStylePropertiesLowerCase( base ); 328 } 329 330 if( "inline-code".equals( cssClass ) ) { 331 monospace = true; 332 } 333 334 if( styleProps != null ) { 335 final String weight = ( String ) styleProps.remove( "font-weight" ); 336 final String style = ( String ) styleProps.remove( "font-style" ); 337 338 if ( n.equals( "p" ) ) { 339 // change it, so we can print out the css styles for <p> 340 n = "div"; 341 } 342 343 italic = "oblique".equals( style ) || "italic".equals( style ); 344 bold = "bold".equals( weight ) || "bolder".equals( weight ); 345 if ( !styleProps.isEmpty() ) { 346 cssSpecial = propsToStyleString( styleProps ); 347 } 348 } 349 350 final ElementDecoratorData dto = new ElementDecoratorData(); 351 dto.base = base; 352 dto.bold = bold; 353 dto.cssClass = cssClass; 354 dto.cssSpecial = cssSpecial; 355 dto.htmlBase = n; 356 dto.ignoredCssClass = ignoredCssClass; 357 dto.italic = italic; 358 dto.monospace = monospace; 359 return dto; 360 } 361 362 private class PreDeque extends ArrayDeque< String > { 363 364 @Override 365 public void addFirst( final String item ) { 366 super.addFirst( item ); 367 outTrimmer.setWhitespaceTrimMode( isEmpty() ); 368 } 369 370 @Override 371 public String removeFirst() { 372 final String pop = super.removeFirst(); 373 outTrimmer.setWhitespaceTrimMode( isEmpty() ); 374 return pop; 375 } 376 377 } 378 379 /** 380 * Simple data placeholder class to move decoration state between plain text syntax translation related classes. 381 */ 382 public static class ElementDecoratorData { 383 384 /** don't allow instantiation outside enclosing class. */ 385 private ElementDecoratorData() {} 386 387 public Element base; 388 public String htmlBase; 389 public String cssClass; 390 public String cssSpecial; 391 public boolean monospace; 392 public boolean bold; 393 public boolean italic; 394 public boolean ignoredCssClass; 395 } 396 397}