001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.htmltowiki;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.logging.log4j.LogManager;
023import org.apache.logging.log4j.Logger;
024import org.apache.wiki.api.core.Engine;
025import org.apache.wiki.util.ClassUtil;
026import org.apache.wiki.util.XmlUtil;
027import org.jdom2.Content;
028import org.jdom2.Element;
029import org.jdom2.JDOMException;
030import org.jdom2.Text;
031import org.jdom2.xpath.XPathFactory;
032
033import java.io.PrintWriter;
034import java.util.Arrays;
035import java.util.LinkedHashMap;
036import java.util.Map;
037import java.util.Stack;
038
039
040/**
041 * Converting XHtml to Wiki Markup.  This is the class which orchestrates all the heavy loading.
042 */
043public class XHtmlElementToWikiTranslator {
044
045    private static final Logger LOG = LogManager.getLogger( XHtmlElementToWikiTranslator.class );
046    private static final String DEFAULT_SYNTAX_DECORATOR = "org.apache.wiki.htmltowiki.syntax.jspwiki.JSPWikiSyntaxDecorator";
047
048    private final Engine e;
049    private final XHtmlToWikiConfig config;
050    private final WhitespaceTrimWriter outTrimmer = new WhitespaceTrimWriter();
051    private final SyntaxDecorator syntax;
052
053    /**
054     *  Create a new translator using the default config.
055     *
056     *  @param base The base element from which to start translating.
057     *  @throws JDOMException If the DOM tree is faulty.
058     */
059    public XHtmlElementToWikiTranslator( final Engine e, final Element base ) throws JDOMException, ReflectiveOperationException {
060        this( e, base, new XHtmlToWikiConfig() );
061    }
062
063    /**
064     *  Create a new translator using the specified config.
065     *
066     *  @param base The base element from which to start translating.
067     *  @param config The config to use.
068     *  @throws JDOMException If the DOM tree is faulty.
069     */
070    public XHtmlElementToWikiTranslator( final Engine e, final Element base, final XHtmlToWikiConfig config ) throws JDOMException, ReflectiveOperationException {
071        this.e = e;
072        this.config = config;
073        syntax = getSyntaxDecorator();
074        final PrintWriter out = new PrintWriter( outTrimmer );
075        final Stack< String > liStack = new Stack<>();
076        final Stack< String > preStack = new PreStack();
077
078        syntax.init( out, liStack, preStack, outTrimmer, config, this );
079        translate( base );
080    }
081
082    SyntaxDecorator getSyntaxDecorator() throws ReflectiveOperationException {
083        String sdClass = e.getWikiProperties().getProperty( "jspwiki.syntax.decorator", DEFAULT_SYNTAX_DECORATOR );
084        if( !ClassUtil.assignable( sdClass, SyntaxDecorator.class.getName() ) ) {
085            LOG.warn( "{} does not subclass {} reverting to default syntax decorator.", sdClass, SyntaxDecorator.class.getName() );
086            sdClass = DEFAULT_SYNTAX_DECORATOR;
087        }
088        LOG.info( "Using {} as markup parser.", sdClass );
089        return ClassUtil.buildInstance( sdClass );
090    }
091
092    /**
093     * Outputs parsed wikitext.
094     *
095     * @return parsed wikitext.
096     */
097    public String getWikiString() {
098        return outTrimmer.toString();
099    }
100
101    public void translate( final Content element ) throws JDOMException {
102        if( element instanceof Text ) {
103            translateText( ( Text ) element );
104        } else if( element instanceof Element ) {
105            final Element base = ( Element )element;
106            if( "imageplugin".equals( base.getAttributeValue( "class" ) ) ) {
107                translateImage( base );
108            } else if( "wikiform".equals( base.getAttributeValue( "class" ) ) ) {
109                // only print the children if the div's class="wikiform", but not the div itself.
110                translateChildren( base );
111            } else {
112                translateParagraph( base );
113            }
114        }
115    }
116
117    public void translateText( final Text element ) {
118        syntax.text( element );
119    }
120
121    public void translateImage( final Element base ) {
122        Element child = XmlUtil.getXPathElement( base, "TBODY/TR/TD/*" );
123        if( child == null ) {
124            child = base;
125        }
126        final Element img;
127        final String href;
128        if( child.getName().equals( "A" ) ) {
129            img = child.getChild( "IMG" );
130            href = child.getAttributeValue( "href" );
131        } else {
132            img = child;
133            href = null;
134        }
135        if( img == null ) {
136            return;
137        }
138        final String src = config.trimLink( img.getAttributeValue( "src" ) );
139        if( src == null ) {
140            return;
141        }
142
143        final Map< String, Object > imageAttrs = new LinkedHashMap<>();
144        putIfNotEmpty( imageAttrs, "align", base.getAttributeValue( "align" ) );
145        putIfNotEmpty( imageAttrs, "height", img.getAttributeValue( "height" ) );
146        putIfNotEmpty( imageAttrs, "width", img.getAttributeValue( "width" ) );
147        putIfNotEmpty( imageAttrs, "alt", img.getAttributeValue( "alt" ) );
148        putIfNotEmpty( imageAttrs, "caption", emptyToNull( ( Element )XPathFactory.instance().compile(  "CAPTION" ).evaluateFirst( base ) ) );
149        putIfNotEmpty( imageAttrs, "link", href );
150        putIfNotEmpty( imageAttrs, "border", img.getAttributeValue( "border" ) );
151        putIfNotEmpty( imageAttrs, "style", base.getAttributeValue( "style" ) );
152        syntax.image( src, imageAttrs );
153    }
154
155    private void putIfNotEmpty( final Map< String, Object > map, final String key, final Object value ) {
156        if( value != null ) {
157            map.put( key, value );
158        }
159    }
160
161    private String emptyToNull( final Element e ) {
162        if( e == null ) {
163            return null;
164        }
165        final String s = e.getText();
166        return s == null ? null : ( s.replaceAll( "\\s", "" ).isEmpty() ? null : s );
167    }
168
169    private Map< Object, Object > getStylePropertiesLowerCase( final Element base ) {
170        final String n = base.getName().toLowerCase();
171
172        // "font-weight: bold; font-style: italic;"
173        String style = base.getAttributeValue( "style" );
174        if( style == null ) {
175            style = "";
176        }
177
178        if( n.equals( "p" ) || n.equals( "div" ) ) {
179            final String align = base.getAttributeValue( "align" );
180            if( align != null ) {
181                // only add the value of the align attribute if the text-align style didn't already exist.
182                if( !style.contains( "text-align" ) ) {
183                    style += ";text-align:" + align + ";";
184                }
185            }
186        }
187
188        if( n.equals( "font" ) ) {
189            final String color = base.getAttributeValue( "color" );
190            final String face = base.getAttributeValue( "face" );
191            final String size = base.getAttributeValue( "size" );
192            if( color != null ) {
193                style = style + "color:" + color + ";";
194            }
195            if( face != null ) {
196                style = style + "font-family:" + face + ";";
197            }
198            if( size != null ) {
199                switch ( size ) {
200                    case "1": style += "font-size:xx-small;"; break;
201                    case "2": style += "font-size:x-small;"; break;
202                    case "3": style += "font-size:small;"; break;
203                    case "4": style += "font-size:medium;"; break;
204                    case "5": style += "font-size:large;"; break;
205                    case "6": style += "font-size:x-large;"; break;
206                    case "7": style += "font-size:xx-large;"; break;
207                }
208            }
209        }
210
211        if( style.equals( "" ) ) {
212            return null;
213        }
214
215        final Map< Object, Object > m = new LinkedHashMap<>();
216        Arrays.stream( style.toLowerCase().split( ";" ) )
217              .filter( StringUtils::isNotEmpty )
218              .forEach( prop -> m.put( prop.split( ":" )[ 0 ].trim(), prop.split( ":" )[ 1 ].trim() ) );
219        return m;
220    }
221
222    private String propsToStyleString( final Map< Object, Object >  styleProps ) {
223        final StringBuilder style = new StringBuilder();
224        for( final Map.Entry< Object, Object > entry : styleProps.entrySet() ) {
225            style.append( " " ).append( entry.getKey() ).append( ": " ).append( entry.getValue() ).append( ";" );
226        }
227        return style.toString();
228    }
229
230    public void translateChildren( final Element base ) throws JDOMException {
231        for( final Content c : base.getContent() ) {
232            if( c instanceof Element ) {
233                final Element e = ( Element )c;
234                final String n = e.getName().toLowerCase();
235                switch( n ) {
236                    case "h1": syntax.h1( e ); break;
237                    case "h2": syntax.h2( e ); break;
238                    case "h3": syntax.h3( e ); break;
239                    case "h4": syntax.h4( e ); break;
240                    case "p": syntax.p( e ); break;
241                    case "br": syntax.br( base, e ); break;
242                    case "hr": syntax.hr( e ); break;
243                    case "table": syntax.table( e ); break;
244                    case "tr": syntax.tr( e ); break;
245                    case "td": syntax.td( e ); break;
246                    case "th": syntax.th( e ); break;
247                    case "a": syntax.a( e ); break;
248                    case "b":
249                    case "strong": syntax.strong( e ); break;
250                    case "i":
251                    case "em":
252                    case "address": syntax.em( e ); break;
253                    case "u": syntax.underline( e ); break;
254                    case "strike": syntax.strike( e ); break;
255                    case "sub": syntax.sub( e ); break;
256                    case "sup": syntax.sup( e ); break;
257                    case "dl": syntax.dl( e ); break;
258                    case "dt": syntax.dt( e ); break;
259                    case "dd": syntax.dd( e ); break;
260                    case "ul": syntax.ul( e ); break;
261                    case "ol": syntax.ol( e ); break;
262                    case "li": syntax.li( base, e ); break;
263                    case "pre": syntax.pre( e ); break;
264                    case "code":
265                    case "tt": syntax.code( e ); break;
266                    case "img": syntax.img( e ); break;
267                    case "form": syntax.form( e ); break;
268                    case "input": syntax.input( e ); break;
269                    case "textarea": syntax.textarea( e ); break;
270                    case "select": syntax.select( e ); break;
271                    case "option": syntax.option( base, e ); break;
272                    default: translate( e ); break;
273                }
274            } else {
275                translate( c );
276            }
277        }
278    }
279
280    public void translateParagraph( final Element base ) throws JDOMException {
281        final ElementDecoratorData dto = buildElementDecoratorDataFrom( base );
282        syntax.paragraph( dto );
283    }
284
285    ElementDecoratorData buildElementDecoratorDataFrom( final Element base ) {
286        String n = base.getName().toLowerCase();
287        boolean bold = false;
288        boolean italic = false;
289        boolean monospace = false;
290        String cssSpecial = null;
291        final String cssClass = base.getAttributeValue( "class" );
292
293        // accomodate a FCKeditor bug with Firefox: when a link is removed, it becomes <span class="wikipage">text</span>.
294        final boolean ignoredCssClass = cssClass != null && cssClass.matches( "wikipage|createpage|external|interwiki|attachment|inline-code" );
295
296        Map< Object, Object > styleProps = null;
297
298        // Only get the styles if it's not a link element. Styles for link elements are handled as an AugmentedWikiLink instead.
299        if( !n.equals( "a" ) ) {
300            styleProps = getStylePropertiesLowerCase( base );
301        }
302
303        if( "inline-code".equals( cssClass ) ) {
304            monospace = true;
305        }
306
307        if( styleProps != null ) {
308            final String weight = ( String ) styleProps.remove( "font-weight" );
309            final String style = ( String ) styleProps.remove( "font-style" );
310
311            if ( n.equals( "p" ) ) {
312                // change it, so we can print out the css styles for <p>
313                n = "div";
314            }
315
316            italic = "oblique".equals( style ) || "italic".equals( style );
317            bold = "bold".equals( weight ) || "bolder".equals( weight );
318            if ( !styleProps.isEmpty() ) {
319                cssSpecial = propsToStyleString( styleProps );
320            }
321        }
322
323        final ElementDecoratorData dto = new ElementDecoratorData();
324        dto.base = base;
325        dto.bold = bold;
326        dto.cssClass = cssClass;
327        dto.cssSpecial = cssSpecial;
328        dto.htmlBase = n;
329        dto.ignoredCssClass = ignoredCssClass;
330        dto.italic = italic;
331        dto.monospace = monospace;
332        return dto;
333    }
334
335    private class PreStack extends Stack< String > {
336
337        @Override
338        public String push( final String item ) {
339            final String push = super.push( item );
340            outTrimmer.setWhitespaceTrimMode( isEmpty() );
341            return push;
342        }
343
344        @Override
345        public synchronized String pop() {
346            final String pop = super.pop();
347            outTrimmer.setWhitespaceTrimMode( isEmpty() );
348            return pop;
349        }
350
351    }
352
353    /**
354     * Simple data placeholder class to move decoration state between plain text syntax translation related classes.
355     */
356    public static class ElementDecoratorData {
357
358        /** don't allow instantiation outside enclosing class. */
359        private ElementDecoratorData() {}
360
361        public Element base;
362        public String htmlBase;
363        public String cssClass;
364        public String cssSpecial;
365        public boolean monospace;
366        public boolean bold;
367        public boolean italic;
368        public boolean ignoredCssClass;
369    }
370
371}