001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.htmltowiki; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.logging.log4j.LogManager; 023import org.apache.logging.log4j.Logger; 024import org.apache.wiki.api.core.Engine; 025import org.apache.wiki.htmltowiki.syntax.MarkupHelper; 026import org.apache.wiki.util.ClassUtil; 027import org.apache.wiki.util.XmlUtil; 028import org.jdom2.Content; 029import org.jdom2.Element; 030import org.jdom2.JDOMException; 031import org.jdom2.Verifier; 032import org.jdom2.Text; 033import org.jdom2.xpath.XPathFactory; 034 035import java.io.PrintWriter; 036import java.util.ArrayDeque; 037import java.util.Arrays; 038import java.util.Deque; 039import java.util.LinkedHashMap; 040import java.util.Map; 041import java.util.stream.Collectors; 042 043 044/** 045 * Converting XHtml to Wiki Markup. This is the class which orchestrates all the heavy loading. 046 */ 047public class XHtmlElementToWikiTranslator { 048 049 private static final Logger LOG = LogManager.getLogger( XHtmlElementToWikiTranslator.class ); 050 private static final String DEFAULT_SYNTAX_DECORATOR = "org.apache.wiki.htmltowiki.syntax.jspwiki.JSPWikiSyntaxDecorator"; 051 052 private final Engine e; 053 private final XHtmlToWikiConfig config; 054 private final WhitespaceTrimWriter outTrimmer = new WhitespaceTrimWriter(); 055 private final SyntaxDecorator syntax; 056 057 /** 058 * Create a new translator using the default config. 059 * 060 * @param base The base element from which to start translating. 061 * @throws JDOMException If the DOM tree is faulty. 062 */ 063 public XHtmlElementToWikiTranslator( final Engine e, final Element base ) throws JDOMException, ReflectiveOperationException { 064 this( e, base, new XHtmlToWikiConfig() ); 065 } 066 067 /** 068 * Create a new translator using the specified config. 069 * 070 * @param base The base element from which to start translating. 071 * @param config The config to use. 072 * @throws JDOMException If the DOM tree is faulty. 073 */ 074 public XHtmlElementToWikiTranslator( final Engine e, final Element base, final XHtmlToWikiConfig config ) throws JDOMException, ReflectiveOperationException { 075 this.e = e; 076 this.config = config; 077 syntax = getSyntaxDecorator(); 078 final PrintWriter out = new PrintWriter( outTrimmer ); 079 final Deque< String > liStack = new ArrayDeque<>(); 080 final Deque< String > preStack = new PreDeque(); 081 082 syntax.init( out, liStack, preStack, outTrimmer, config, this ); 083 translate( base ); 084 } 085 086 SyntaxDecorator getSyntaxDecorator() throws ReflectiveOperationException { 087 String sdClass = e.getWikiProperties().getProperty( "jspwiki.syntax.decorator", DEFAULT_SYNTAX_DECORATOR ); 088 if( !ClassUtil.assignable( sdClass, SyntaxDecorator.class.getName() ) ) { 089 LOG.warn( "{} does not subclass {} reverting to default syntax decorator.", sdClass, SyntaxDecorator.class.getName() ); 090 sdClass = DEFAULT_SYNTAX_DECORATOR; 091 } 092 LOG.debug( "Using {} as markup parser.", sdClass ); 093 return ClassUtil.buildInstance( sdClass ); 094 } 095 096 /** 097 * Outputs parsed wikitext. 098 * 099 * @return parsed wikitext. 100 */ 101 public String getWikiString() { 102 return outTrimmer.toString(); 103 } 104 105 public void translate( final Content element ) throws JDOMException { 106 if( element instanceof Text ) { 107 translateText( ( Text ) element ); 108 } else if( element instanceof Element ) { 109 final Element base = ( Element )element; 110 final String n = base.getName().toLowerCase(); 111 final String reason = Verifier.checkElementName(n); 112 113 if (reason != null) return; /* invalid element name */ 114 115 if( "script".equals( n ) ) return; /* nono, not a good idea*/ 116 117 if( "imageplugin".equals( base.getAttributeValue( "class" ) ) ) { 118 translateImage( base ); 119 } else if( "wikiform".equals( base.getAttributeValue( "class" ) ) ) { 120 // only print the children if the div's class="wikiform", but not the div itself. 121 translateChildren( base ); 122 } else { 123 translateParagraph( base ); 124 } 125 } 126 } 127 128 public void translateText( final Text element ) { 129 syntax.text( element ); 130 } 131 132 public void translateImage( final Element base ) { 133 Element child = XmlUtil.getXPathElement( base, "TBODY/TR/TD/*" ); 134 if( child == null ) { 135 child = base; 136 } 137 final Element img; 138 final String href; 139 if( child.getName().equals( "A" ) ) { 140 img = child.getChild( "IMG" ); 141 href = child.getAttributeValue( "href" ); 142 } else { 143 img = child; 144 href = null; 145 } 146 if( img == null ) { 147 return; 148 } 149 final String src = config.trimLink( img.getAttributeValue( "src" ) ); 150 if( src == null ) { 151 return; 152 } 153 154 final Map< String, Object > imageAttrs = new LinkedHashMap<>(); 155 putIfNotEmpty( imageAttrs, "align", base.getAttributeValue( "align" ) ); 156 putIfNotEmpty( imageAttrs, "height", img.getAttributeValue( "height" ) ); 157 putIfNotEmpty( imageAttrs, "width", img.getAttributeValue( "width" ) ); 158 putIfNotEmpty( imageAttrs, "alt", img.getAttributeValue( "alt" ) ); 159 putIfNotEmpty( imageAttrs, "caption", emptyToNull( ( Element )XPathFactory.instance().compile( "CAPTION" ).evaluateFirst( base ) ) ); 160 putIfNotEmpty( imageAttrs, "link", href ); 161 putIfNotEmpty( imageAttrs, "border", img.getAttributeValue( "border" ) ); 162 putIfNotEmpty( imageAttrs, "style", base.getAttributeValue( "style" ) ); 163 syntax.image( src, imageAttrs ); 164 } 165 166 private void putIfNotEmpty( final Map< String, Object > map, final String key, final Object value ) { 167 if( value != null ) { 168 map.put( key, value ); 169 } 170 } 171 172 private String emptyToNull( final Element e ) { 173 if( e == null ) { 174 return null; 175 } 176 final String s = e.getText(); 177 return s == null ? null : ( s.replaceAll( "\\s", "" ).isEmpty() ? null : s ); 178 } 179 180 private Map< Object, Object > getStylePropertiesLowerCase( final Element base ) { 181 final String n = base.getName().toLowerCase(); 182 183 // "font-weight: bold; font-style: italic;" 184 String style = base.getAttributeValue( "style" ); 185 if( style == null ) { 186 style = ""; 187 } 188 189 if( n.equals( "p" ) || n.equals( "div" ) ) { 190 final String align = base.getAttributeValue( "align" ); 191 if( align != null ) { 192 // only add the value of the align attribute if the text-align style didn't already exist. 193 if( !style.contains( "text-align" ) ) { 194 style += ";text-align:" + align + ";"; 195 } 196 } 197 } 198 199 if( n.equals( "font" ) ) { 200 final String color = base.getAttributeValue( "color" ); 201 final String face = base.getAttributeValue( "face" ); 202 final String size = base.getAttributeValue( "size" ); 203 if( color != null ) { 204 style = style + "color:" + color + ";"; 205 } 206 if( face != null ) { 207 style = style + "font-family:" + face + ";"; 208 } 209 if( size != null ) { 210 switch ( size ) { 211 case "1": style += "font-size:xx-small;"; break; 212 case "2": style += "font-size:x-small;"; break; 213 case "3": style += "font-size:small;"; break; 214 case "4": style += "font-size:medium;"; break; 215 case "5": style += "font-size:large;"; break; 216 case "6": style += "font-size:x-large;"; break; 217 case "7": style += "font-size:xx-large;"; break; 218 } 219 } 220 } 221 222 if( style.equals( "" ) ) { 223 return null; 224 } 225 226 final Map< Object, Object > m = new LinkedHashMap<>(); 227 Arrays.stream( style.toLowerCase().split( ";" ) ) 228 .filter( StringUtils::isNotEmpty ) 229 .forEach( prop -> m.put( prop.split( ":" )[ 0 ].trim(), prop.split( ":" )[ 1 ].trim() ) ); 230 return m; 231 } 232 233 private String propsToStyleString( final Map< Object, Object > styleProps ) { 234 return styleProps.entrySet().stream().map(entry -> " " + entry.getKey() + ": " + entry.getValue() + ";").collect(Collectors.joining()); 235 } 236 237 public void translateChildren( final Element base ) throws JDOMException { 238 for( final Content c : base.getContent() ) { 239 if( c instanceof Element ) { 240 final Element e = ( Element )c; 241 final String n = e.getName().toLowerCase(); 242 switch( n ) { 243 case "h1": syntax.h1( e ); break; 244 case "h2": syntax.h2( e ); break; 245 case "h3": syntax.h3( e ); break; 246 case "h4": syntax.h4( e ); break; 247 case "p": syntax.p( e ); break; 248 case "br": syntax.br( base, e ); break; 249 case "hr": syntax.hr( e ); break; 250 case "table": syntax.table( e ); break; 251 case "tbody": syntax.tbody( e ); break; 252 case "tr": syntax.tr( e ); break; 253 case "td": syntax.td( e ); break; 254 case "thead": syntax.thead( e ); break; 255 case "th": syntax.th( e ); break; 256 case "a": translateA( e ); break; 257 case "b": 258 case "strong": syntax.strong( e ); break; 259 case "i": 260 case "em": 261 case "address": syntax.em( e ); break; 262 case "u": syntax.underline( e ); break; 263 case "strike": syntax.strike( e ); break; 264 case "sub": syntax.sub( e ); break; 265 case "sup": syntax.sup( e ); break; 266 case "dl": syntax.dl( e ); break; 267 case "dt": syntax.dt( e ); break; 268 case "dd": syntax.dd( e ); break; 269 case "ul": syntax.ul( e ); break; 270 case "ol": syntax.ol( e ); break; 271 case "li": syntax.li( base, e ); break; 272 case "pre": syntax.pre( e ); break; 273 case "code": 274 case "tt": syntax.code( e ); break; 275 case "img": syntax.img( e ); break; 276 case "form": syntax.form( e ); break; 277 case "input": syntax.input( e ); break; 278 case "textarea": syntax.textarea( e ); break; 279 case "select": syntax.select( e ); break; 280 case "option": syntax.option( base, e ); break; 281 default: translate( e ); break; 282 } 283 } else { 284 translate( c ); 285 } 286 } 287 } 288 289 void translateA( final Element e ) throws JDOMException { 290 if( config.isNotIgnorableWikiMarkupLink( e ) ) { 291 if( e.getChild( "IMG" ) != null ) { 292 translateImage( e ); 293 } else { 294 final String ref = config.trimLink( e.getAttributeValue( "href" ) ); 295 if( ref == null ) { 296 if( MarkupHelper.isUndefinedPageLink( e ) ) { 297 syntax.aUndefined( e ); 298 } else { 299 translate( e ); 300 } 301 } else if( MarkupHelper.isFootnoteLink( ref ) ) { 302 final String href = ref.replaceFirst( "#ref-.+-(\\d+)", "$1" ); // convert "#ref-PageName-1" to just "1" 303 final String textValue = e.getValue().substring( 1, ( e.getValue().length() - 1 ) ); // remove the brackets around "[1]" 304 syntax.aFootnote( textValue, href ); 305 } else { 306 syntax.a( e, ref ); 307 } 308 } 309 } 310 } 311 312 public void translateParagraph( final Element base ) throws JDOMException { 313 final ElementDecoratorData dto = buildElementDecoratorDataFrom( base ); 314 syntax.paragraph( dto ); 315 } 316 317 ElementDecoratorData buildElementDecoratorDataFrom( final Element base ) { 318 String n = base.getName().toLowerCase(); 319 boolean bold = false; 320 boolean italic = false; 321 boolean monospace = false; 322 String cssSpecial = null; 323 final String cssClass = base.getAttributeValue( "class" ); 324 325 // accomodate a FCKeditor bug with Firefox: when a link is removed, it becomes <span class="wikipage">text</span>. 326 final boolean ignoredCssClass = cssClass != null && cssClass.matches( "wikipage|createpage|external|interwiki|attachment|inline-code" ); 327 328 Map< Object, Object > styleProps = null; 329 330 // Only get the styles if it's not a link element. Styles for link elements are handled as an AugmentedWikiLink instead. 331 if( !n.equals( "a" ) ) { 332 styleProps = getStylePropertiesLowerCase( base ); 333 } 334 335 if( "inline-code".equals( cssClass ) ) { 336 monospace = true; 337 } 338 339 if( styleProps != null ) { 340 final String weight = ( String ) styleProps.remove( "font-weight" ); 341 final String style = ( String ) styleProps.remove( "font-style" ); 342 343 if ( n.equals( "p" ) ) { 344 // change it, so we can print out the css styles for <p> 345 n = "div"; 346 } 347 348 italic = "oblique".equals( style ) || "italic".equals( style ); 349 bold = "bold".equals( weight ) || "bolder".equals( weight ); 350 if ( !styleProps.isEmpty() ) { 351 cssSpecial = propsToStyleString( styleProps ); 352 } 353 } 354 355 final ElementDecoratorData dto = new ElementDecoratorData(); 356 dto.base = base; 357 dto.bold = bold; 358 dto.cssClass = cssClass; 359 dto.cssSpecial = cssSpecial; 360 dto.htmlBase = n; 361 dto.ignoredCssClass = ignoredCssClass; 362 dto.italic = italic; 363 dto.monospace = monospace; 364 return dto; 365 } 366 367 private class PreDeque extends ArrayDeque< String > { 368 369 private static final long serialVersionUID = 2401529529970187649L; 370 371 @Override 372 public void addFirst( final String item ) { 373 super.addFirst( item ); 374 outTrimmer.setWhitespaceTrimMode( isEmpty() ); 375 } 376 377 @Override 378 public String removeFirst() { 379 final String pop = super.removeFirst(); 380 outTrimmer.setWhitespaceTrimMode( isEmpty() ); 381 return pop; 382 } 383 384 } 385 386 /** 387 * Simple data placeholder class to move decoration state between plain text syntax translation related classes. 388 */ 389 public static class ElementDecoratorData { 390 391 /** don't allow instantiation outside enclosing class. */ 392 private ElementDecoratorData() {} 393 394 public Element base; 395 public String htmlBase; 396 public String cssClass; 397 public String cssSpecial; 398 public boolean monospace; 399 public boolean bold; 400 public boolean italic; 401 public boolean ignoredCssClass; 402 } 403 404}