001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.htmltowiki;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.logging.log4j.LogManager;
023import org.apache.logging.log4j.Logger;
024import org.apache.wiki.api.core.Engine;
025import org.apache.wiki.htmltowiki.syntax.MarkupHelper;
026import org.apache.wiki.util.ClassUtil;
027import org.apache.wiki.util.XmlUtil;
028import org.jdom2.Content;
029import org.jdom2.Element;
030import org.jdom2.JDOMException;
031import org.jdom2.Text;
032import org.jdom2.xpath.XPathFactory;
033
034import java.io.PrintWriter;
035import java.util.ArrayDeque;
036import java.util.Arrays;
037import java.util.Deque;
038import java.util.LinkedHashMap;
039import java.util.Map;
040
041
042/**
043 * Converting XHtml to Wiki Markup.  This is the class which orchestrates all the heavy loading.
044 */
045public class XHtmlElementToWikiTranslator {
046
047    private static final Logger LOG = LogManager.getLogger( XHtmlElementToWikiTranslator.class );
048    private static final String DEFAULT_SYNTAX_DECORATOR = "org.apache.wiki.htmltowiki.syntax.jspwiki.JSPWikiSyntaxDecorator";
049
050    private final Engine e;
051    private final XHtmlToWikiConfig config;
052    private final WhitespaceTrimWriter outTrimmer = new WhitespaceTrimWriter();
053    private final SyntaxDecorator syntax;
054
055    /**
056     *  Create a new translator using the default config.
057     *
058     *  @param base The base element from which to start translating.
059     *  @throws JDOMException If the DOM tree is faulty.
060     */
061    public XHtmlElementToWikiTranslator( final Engine e, final Element base ) throws JDOMException, ReflectiveOperationException {
062        this( e, base, new XHtmlToWikiConfig() );
063    }
064
065    /**
066     *  Create a new translator using the specified config.
067     *
068     *  @param base The base element from which to start translating.
069     *  @param config The config to use.
070     *  @throws JDOMException If the DOM tree is faulty.
071     */
072    public XHtmlElementToWikiTranslator( final Engine e, final Element base, final XHtmlToWikiConfig config ) throws JDOMException, ReflectiveOperationException {
073        this.e = e;
074        this.config = config;
075        syntax = getSyntaxDecorator();
076        final PrintWriter out = new PrintWriter( outTrimmer );
077        final Deque< String > liStack = new ArrayDeque<>();
078        final Deque< String > preStack = new PreDeque();
079
080        syntax.init( out, liStack, preStack, outTrimmer, config, this );
081        translate( base );
082    }
083
084    SyntaxDecorator getSyntaxDecorator() throws ReflectiveOperationException {
085        String sdClass = e.getWikiProperties().getProperty( "jspwiki.syntax.decorator", DEFAULT_SYNTAX_DECORATOR );
086        if( !ClassUtil.assignable( sdClass, SyntaxDecorator.class.getName() ) ) {
087            LOG.warn( "{} does not subclass {} reverting to default syntax decorator.", sdClass, SyntaxDecorator.class.getName() );
088            sdClass = DEFAULT_SYNTAX_DECORATOR;
089        }
090        LOG.info( "Using {} as markup parser.", sdClass );
091        return ClassUtil.buildInstance( sdClass );
092    }
093
094    /**
095     * Outputs parsed wikitext.
096     *
097     * @return parsed wikitext.
098     */
099    public String getWikiString() {
100        return outTrimmer.toString();
101    }
102
103    public void translate( final Content element ) throws JDOMException {
104        if( element instanceof Text ) {
105            translateText( ( Text ) element );
106        } else if( element instanceof Element ) {
107            final Element base = ( Element )element;
108            if( "imageplugin".equals( base.getAttributeValue( "class" ) ) ) {
109                translateImage( base );
110            } else if( "wikiform".equals( base.getAttributeValue( "class" ) ) ) {
111                // only print the children if the div's class="wikiform", but not the div itself.
112                translateChildren( base );
113            } else {
114                translateParagraph( base );
115            }
116        }
117    }
118
119    public void translateText( final Text element ) {
120        syntax.text( element );
121    }
122
123    public void translateImage( final Element base ) {
124        Element child = XmlUtil.getXPathElement( base, "TBODY/TR/TD/*" );
125        if( child == null ) {
126            child = base;
127        }
128        final Element img;
129        final String href;
130        if( child.getName().equals( "A" ) ) {
131            img = child.getChild( "IMG" );
132            href = child.getAttributeValue( "href" );
133        } else {
134            img = child;
135            href = null;
136        }
137        if( img == null ) {
138            return;
139        }
140        final String src = config.trimLink( img.getAttributeValue( "src" ) );
141        if( src == null ) {
142            return;
143        }
144
145        final Map< String, Object > imageAttrs = new LinkedHashMap<>();
146        putIfNotEmpty( imageAttrs, "align", base.getAttributeValue( "align" ) );
147        putIfNotEmpty( imageAttrs, "height", img.getAttributeValue( "height" ) );
148        putIfNotEmpty( imageAttrs, "width", img.getAttributeValue( "width" ) );
149        putIfNotEmpty( imageAttrs, "alt", img.getAttributeValue( "alt" ) );
150        putIfNotEmpty( imageAttrs, "caption", emptyToNull( ( Element )XPathFactory.instance().compile(  "CAPTION" ).evaluateFirst( base ) ) );
151        putIfNotEmpty( imageAttrs, "link", href );
152        putIfNotEmpty( imageAttrs, "border", img.getAttributeValue( "border" ) );
153        putIfNotEmpty( imageAttrs, "style", base.getAttributeValue( "style" ) );
154        syntax.image( src, imageAttrs );
155    }
156
157    private void putIfNotEmpty( final Map< String, Object > map, final String key, final Object value ) {
158        if( value != null ) {
159            map.put( key, value );
160        }
161    }
162
163    private String emptyToNull( final Element e ) {
164        if( e == null ) {
165            return null;
166        }
167        final String s = e.getText();
168        return s == null ? null : ( s.replaceAll( "\\s", "" ).isEmpty() ? null : s );
169    }
170
171    private Map< Object, Object > getStylePropertiesLowerCase( final Element base ) {
172        final String n = base.getName().toLowerCase();
173
174        // "font-weight: bold; font-style: italic;"
175        String style = base.getAttributeValue( "style" );
176        if( style == null ) {
177            style = "";
178        }
179
180        if( n.equals( "p" ) || n.equals( "div" ) ) {
181            final String align = base.getAttributeValue( "align" );
182            if( align != null ) {
183                // only add the value of the align attribute if the text-align style didn't already exist.
184                if( !style.contains( "text-align" ) ) {
185                    style += ";text-align:" + align + ";";
186                }
187            }
188        }
189
190        if( n.equals( "font" ) ) {
191            final String color = base.getAttributeValue( "color" );
192            final String face = base.getAttributeValue( "face" );
193            final String size = base.getAttributeValue( "size" );
194            if( color != null ) {
195                style = style + "color:" + color + ";";
196            }
197            if( face != null ) {
198                style = style + "font-family:" + face + ";";
199            }
200            if( size != null ) {
201                switch ( size ) {
202                    case "1": style += "font-size:xx-small;"; break;
203                    case "2": style += "font-size:x-small;"; break;
204                    case "3": style += "font-size:small;"; break;
205                    case "4": style += "font-size:medium;"; break;
206                    case "5": style += "font-size:large;"; break;
207                    case "6": style += "font-size:x-large;"; break;
208                    case "7": style += "font-size:xx-large;"; break;
209                }
210            }
211        }
212
213        if( style.equals( "" ) ) {
214            return null;
215        }
216
217        final Map< Object, Object > m = new LinkedHashMap<>();
218        Arrays.stream( style.toLowerCase().split( ";" ) )
219              .filter( StringUtils::isNotEmpty )
220              .forEach( prop -> m.put( prop.split( ":" )[ 0 ].trim(), prop.split( ":" )[ 1 ].trim() ) );
221        return m;
222    }
223
224    private String propsToStyleString( final Map< Object, Object >  styleProps ) {
225        final StringBuilder style = new StringBuilder();
226        for( final Map.Entry< Object, Object > entry : styleProps.entrySet() ) {
227            style.append( " " ).append( entry.getKey() ).append( ": " ).append( entry.getValue() ).append( ";" );
228        }
229        return style.toString();
230    }
231
232    public void translateChildren( final Element base ) throws JDOMException {
233        for( final Content c : base.getContent() ) {
234            if( c instanceof Element ) {
235                final Element e = ( Element )c;
236                final String n = e.getName().toLowerCase();
237                switch( n ) {
238                    case "h1": syntax.h1( e ); break;
239                    case "h2": syntax.h2( e ); break;
240                    case "h3": syntax.h3( e ); break;
241                    case "h4": syntax.h4( e ); break;
242                    case "p": syntax.p( e ); break;
243                    case "br": syntax.br( base, e ); break;
244                    case "hr": syntax.hr( e ); break;
245                    case "table": syntax.table( e ); break;
246                    case "tbody": syntax.tbody( e ); break;
247                    case "tr": syntax.tr( e ); break;
248                    case "td": syntax.td( e ); break;
249                    case "thead": syntax.thead( e ); break;
250                    case "th": syntax.th( e ); break;
251                    case "a": translateA( e ); break;
252                    case "b":
253                    case "strong": syntax.strong( e ); break;
254                    case "i":
255                    case "em":
256                    case "address": syntax.em( e ); break;
257                    case "u": syntax.underline( e ); break;
258                    case "strike": syntax.strike( e ); break;
259                    case "sub": syntax.sub( e ); break;
260                    case "sup": syntax.sup( e ); break;
261                    case "dl": syntax.dl( e ); break;
262                    case "dt": syntax.dt( e ); break;
263                    case "dd": syntax.dd( e ); break;
264                    case "ul": syntax.ul( e ); break;
265                    case "ol": syntax.ol( e ); break;
266                    case "li": syntax.li( base, e ); break;
267                    case "pre": syntax.pre( e ); break;
268                    case "code":
269                    case "tt": syntax.code( e ); break;
270                    case "img": syntax.img( e ); break;
271                    case "form": syntax.form( e ); break;
272                    case "input": syntax.input( e ); break;
273                    case "textarea": syntax.textarea( e ); break;
274                    case "select": syntax.select( e ); break;
275                    case "option": syntax.option( base, e ); break;
276                    default: translate( e ); break;
277                }
278            } else {
279                translate( c );
280            }
281        }
282    }
283
284    void translateA( final Element e ) throws JDOMException {
285        if( config.isNotIgnorableWikiMarkupLink( e ) ) {
286            if( e.getChild( "IMG" ) != null ) {
287                translateImage( e );
288            } else {
289                final String ref = config.trimLink( e.getAttributeValue( "href" ) );
290                if( ref == null ) {
291                    if( MarkupHelper.isUndefinedPageLink( e ) ) {
292                        syntax.aUndefined( e );
293                    } else {
294                        translate( e );
295                    }
296                } else if( MarkupHelper.isFootnoteLink( ref ) ) {
297                    final String href = ref.replaceFirst( "#ref-.+-(\\d+)", "$1" ); // convert "#ref-PageName-1" to just "1"
298                    final String textValue = e.getValue().substring( 1, ( e.getValue().length() - 1 ) ); // remove the brackets around "[1]"
299                    syntax.aFootnote( textValue, href );
300                } else {
301                    syntax.a( e, ref );
302                }
303            }
304        }
305    }
306
307    public void translateParagraph( final Element base ) throws JDOMException {
308        final ElementDecoratorData dto = buildElementDecoratorDataFrom( base );
309        syntax.paragraph( dto );
310    }
311
312    ElementDecoratorData buildElementDecoratorDataFrom( final Element base ) {
313        String n = base.getName().toLowerCase();
314        boolean bold = false;
315        boolean italic = false;
316        boolean monospace = false;
317        String cssSpecial = null;
318        final String cssClass = base.getAttributeValue( "class" );
319
320        // accomodate a FCKeditor bug with Firefox: when a link is removed, it becomes <span class="wikipage">text</span>.
321        final boolean ignoredCssClass = cssClass != null && cssClass.matches( "wikipage|createpage|external|interwiki|attachment|inline-code" );
322
323        Map< Object, Object > styleProps = null;
324
325        // Only get the styles if it's not a link element. Styles for link elements are handled as an AugmentedWikiLink instead.
326        if( !n.equals( "a" ) ) {
327            styleProps = getStylePropertiesLowerCase( base );
328        }
329
330        if( "inline-code".equals( cssClass ) ) {
331            monospace = true;
332        }
333
334        if( styleProps != null ) {
335            final String weight = ( String ) styleProps.remove( "font-weight" );
336            final String style = ( String ) styleProps.remove( "font-style" );
337
338            if ( n.equals( "p" ) ) {
339                // change it, so we can print out the css styles for <p>
340                n = "div";
341            }
342
343            italic = "oblique".equals( style ) || "italic".equals( style );
344            bold = "bold".equals( weight ) || "bolder".equals( weight );
345            if ( !styleProps.isEmpty() ) {
346                cssSpecial = propsToStyleString( styleProps );
347            }
348        }
349
350        final ElementDecoratorData dto = new ElementDecoratorData();
351        dto.base = base;
352        dto.bold = bold;
353        dto.cssClass = cssClass;
354        dto.cssSpecial = cssSpecial;
355        dto.htmlBase = n;
356        dto.ignoredCssClass = ignoredCssClass;
357        dto.italic = italic;
358        dto.monospace = monospace;
359        return dto;
360    }
361
362    private class PreDeque extends ArrayDeque< String > {
363
364        @Override
365        public void addFirst( final String item ) {
366            super.addFirst( item );
367            outTrimmer.setWhitespaceTrimMode( isEmpty() );
368        }
369
370        @Override
371        public String removeFirst() {
372            final String pop = super.removeFirst();
373            outTrimmer.setWhitespaceTrimMode( isEmpty() );
374            return pop;
375        }
376
377    }
378
379    /**
380     * Simple data placeholder class to move decoration state between plain text syntax translation related classes.
381     */
382    public static class ElementDecoratorData {
383
384        /** don't allow instantiation outside enclosing class. */
385        private ElementDecoratorData() {}
386
387        public Element base;
388        public String htmlBase;
389        public String cssClass;
390        public String cssSpecial;
391        public boolean monospace;
392        public boolean bold;
393        public boolean italic;
394        public boolean ignoredCssClass;
395    }
396
397}