001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.htmltowiki;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.logging.log4j.LogManager;
023import org.apache.logging.log4j.Logger;
024import org.apache.wiki.api.core.Engine;
025import org.apache.wiki.htmltowiki.syntax.MarkupHelper;
026import org.apache.wiki.util.ClassUtil;
027import org.apache.wiki.util.XmlUtil;
028import org.jdom2.Content;
029import org.jdom2.Element;
030import org.jdom2.JDOMException;
031import org.jdom2.Verifier;
032import org.jdom2.Text;
033import org.jdom2.xpath.XPathFactory;
034
035import java.io.PrintWriter;
036import java.util.ArrayDeque;
037import java.util.Arrays;
038import java.util.Deque;
039import java.util.LinkedHashMap;
040import java.util.Map;
041
042
043/**
044 * Converting XHtml to Wiki Markup.  This is the class which orchestrates all the heavy loading.
045 */
046public class XHtmlElementToWikiTranslator {
047
048    private static final Logger LOG = LogManager.getLogger( XHtmlElementToWikiTranslator.class );
049    private static final String DEFAULT_SYNTAX_DECORATOR = "org.apache.wiki.htmltowiki.syntax.jspwiki.JSPWikiSyntaxDecorator";
050
051    private final Engine e;
052    private final XHtmlToWikiConfig config;
053    private final WhitespaceTrimWriter outTrimmer = new WhitespaceTrimWriter();
054    private final SyntaxDecorator syntax;
055
056    /**
057     *  Create a new translator using the default config.
058     *
059     *  @param base The base element from which to start translating.
060     *  @throws JDOMException If the DOM tree is faulty.
061     */
062    public XHtmlElementToWikiTranslator( final Engine e, final Element base ) throws JDOMException, ReflectiveOperationException {
063        this( e, base, new XHtmlToWikiConfig() );
064    }
065
066    /**
067     *  Create a new translator using the specified config.
068     *
069     *  @param base The base element from which to start translating.
070     *  @param config The config to use.
071     *  @throws JDOMException If the DOM tree is faulty.
072     */
073    public XHtmlElementToWikiTranslator( final Engine e, final Element base, final XHtmlToWikiConfig config ) throws JDOMException, ReflectiveOperationException {
074        this.e = e;
075        this.config = config;
076        syntax = getSyntaxDecorator();
077        final PrintWriter out = new PrintWriter( outTrimmer );
078        final Deque< String > liStack = new ArrayDeque<>();
079        final Deque< String > preStack = new PreDeque();
080
081        syntax.init( out, liStack, preStack, outTrimmer, config, this );
082        translate( base );
083    }
084
085    SyntaxDecorator getSyntaxDecorator() throws ReflectiveOperationException {
086        String sdClass = e.getWikiProperties().getProperty( "jspwiki.syntax.decorator", DEFAULT_SYNTAX_DECORATOR );
087        if( !ClassUtil.assignable( sdClass, SyntaxDecorator.class.getName() ) ) {
088            LOG.warn( "{} does not subclass {} reverting to default syntax decorator.", sdClass, SyntaxDecorator.class.getName() );
089            sdClass = DEFAULT_SYNTAX_DECORATOR;
090        }
091        LOG.debug( "Using {} as markup parser.", sdClass );
092        return ClassUtil.buildInstance( sdClass );
093    }
094
095    /**
096     * Outputs parsed wikitext.
097     *
098     * @return parsed wikitext.
099     */
100    public String getWikiString() {
101        return outTrimmer.toString();
102    }
103
104    public void translate( final Content element ) throws JDOMException {
105        if( element instanceof Text ) {
106            translateText( ( Text ) element );
107        } else if( element instanceof Element ) {
108            final Element base = ( Element )element;
109            final String n = base.getName().toLowerCase();
110            final String reason = Verifier.checkElementName(n);
111
112            if (reason != null)  return; /* invalid element name */
113
114            if( "script".equals( n ) ) return; /* nono, not a good idea*/
115
116            if( "imageplugin".equals( base.getAttributeValue( "class" ) ) ) {
117                translateImage( base );
118            } else if( "wikiform".equals( base.getAttributeValue( "class" ) ) ) {
119                // only print the children if the div's class="wikiform", but not the div itself.
120                translateChildren( base );
121            } else {
122                translateParagraph( base );
123            }
124        }
125    }
126
127    public void translateText( final Text element ) {
128        syntax.text( element );
129    }
130
131    public void translateImage( final Element base ) {
132        Element child = XmlUtil.getXPathElement( base, "TBODY/TR/TD/*" );
133        if( child == null ) {
134            child = base;
135        }
136        final Element img;
137        final String href;
138        if( child.getName().equals( "A" ) ) {
139            img = child.getChild( "IMG" );
140            href = child.getAttributeValue( "href" );
141        } else {
142            img = child;
143            href = null;
144        }
145        if( img == null ) {
146            return;
147        }
148        final String src = config.trimLink( img.getAttributeValue( "src" ) );
149        if( src == null ) {
150            return;
151        }
152
153        final Map< String, Object > imageAttrs = new LinkedHashMap<>();
154        putIfNotEmpty( imageAttrs, "align", base.getAttributeValue( "align" ) );
155        putIfNotEmpty( imageAttrs, "height", img.getAttributeValue( "height" ) );
156        putIfNotEmpty( imageAttrs, "width", img.getAttributeValue( "width" ) );
157        putIfNotEmpty( imageAttrs, "alt", img.getAttributeValue( "alt" ) );
158        putIfNotEmpty( imageAttrs, "caption", emptyToNull( ( Element )XPathFactory.instance().compile(  "CAPTION" ).evaluateFirst( base ) ) );
159        putIfNotEmpty( imageAttrs, "link", href );
160        putIfNotEmpty( imageAttrs, "border", img.getAttributeValue( "border" ) );
161        putIfNotEmpty( imageAttrs, "style", base.getAttributeValue( "style" ) );
162        syntax.image( src, imageAttrs );
163    }
164
165    private void putIfNotEmpty( final Map< String, Object > map, final String key, final Object value ) {
166        if( value != null ) {
167            map.put( key, value );
168        }
169    }
170
171    private String emptyToNull( final Element e ) {
172        if( e == null ) {
173            return null;
174        }
175        final String s = e.getText();
176        return s == null ? null : ( s.replaceAll( "\\s", "" ).isEmpty() ? null : s );
177    }
178
179    private Map< Object, Object > getStylePropertiesLowerCase( final Element base ) {
180        final String n = base.getName().toLowerCase();
181
182        // "font-weight: bold; font-style: italic;"
183        String style = base.getAttributeValue( "style" );
184        if( style == null ) {
185            style = "";
186        }
187
188        if( n.equals( "p" ) || n.equals( "div" ) ) {
189            final String align = base.getAttributeValue( "align" );
190            if( align != null ) {
191                // only add the value of the align attribute if the text-align style didn't already exist.
192                if( !style.contains( "text-align" ) ) {
193                    style += ";text-align:" + align + ";";
194                }
195            }
196        }
197
198        if( n.equals( "font" ) ) {
199            final String color = base.getAttributeValue( "color" );
200            final String face = base.getAttributeValue( "face" );
201            final String size = base.getAttributeValue( "size" );
202            if( color != null ) {
203                style = style + "color:" + color + ";";
204            }
205            if( face != null ) {
206                style = style + "font-family:" + face + ";";
207            }
208            if( size != null ) {
209                switch ( size ) {
210                    case "1": style += "font-size:xx-small;"; break;
211                    case "2": style += "font-size:x-small;"; break;
212                    case "3": style += "font-size:small;"; break;
213                    case "4": style += "font-size:medium;"; break;
214                    case "5": style += "font-size:large;"; break;
215                    case "6": style += "font-size:x-large;"; break;
216                    case "7": style += "font-size:xx-large;"; break;
217                }
218            }
219        }
220
221        if( style.equals( "" ) ) {
222            return null;
223        }
224
225        final Map< Object, Object > m = new LinkedHashMap<>();
226        Arrays.stream( style.toLowerCase().split( ";" ) )
227              .filter( StringUtils::isNotEmpty )
228              .forEach( prop -> m.put( prop.split( ":" )[ 0 ].trim(), prop.split( ":" )[ 1 ].trim() ) );
229        return m;
230    }
231
232    private String propsToStyleString( final Map< Object, Object >  styleProps ) {
233        final StringBuilder style = new StringBuilder();
234        for( final Map.Entry< Object, Object > entry : styleProps.entrySet() ) {
235            style.append( " " ).append( entry.getKey() ).append( ": " ).append( entry.getValue() ).append( ";" );
236        }
237        return style.toString();
238    }
239
240    public void translateChildren( final Element base ) throws JDOMException {
241        for( final Content c : base.getContent() ) {
242            if( c instanceof Element ) {
243                final Element e = ( Element )c;
244                final String n = e.getName().toLowerCase();
245                switch( n ) {
246                    case "h1": syntax.h1( e ); break;
247                    case "h2": syntax.h2( e ); break;
248                    case "h3": syntax.h3( e ); break;
249                    case "h4": syntax.h4( e ); break;
250                    case "p": syntax.p( e ); break;
251                    case "br": syntax.br( base, e ); break;
252                    case "hr": syntax.hr( e ); break;
253                    case "table": syntax.table( e ); break;
254                    case "tbody": syntax.tbody( e ); break;
255                    case "tr": syntax.tr( e ); break;
256                    case "td": syntax.td( e ); break;
257                    case "thead": syntax.thead( e ); break;
258                    case "th": syntax.th( e ); break;
259                    case "a": translateA( e ); break;
260                    case "b":
261                    case "strong": syntax.strong( e ); break;
262                    case "i":
263                    case "em":
264                    case "address": syntax.em( e ); break;
265                    case "u": syntax.underline( e ); break;
266                    case "strike": syntax.strike( e ); break;
267                    case "sub": syntax.sub( e ); break;
268                    case "sup": syntax.sup( e ); break;
269                    case "dl": syntax.dl( e ); break;
270                    case "dt": syntax.dt( e ); break;
271                    case "dd": syntax.dd( e ); break;
272                    case "ul": syntax.ul( e ); break;
273                    case "ol": syntax.ol( e ); break;
274                    case "li": syntax.li( base, e ); break;
275                    case "pre": syntax.pre( e ); break;
276                    case "code":
277                    case "tt": syntax.code( e ); break;
278                    case "img": syntax.img( e ); break;
279                    case "form": syntax.form( e ); break;
280                    case "input": syntax.input( e ); break;
281                    case "textarea": syntax.textarea( e ); break;
282                    case "select": syntax.select( e ); break;
283                    case "option": syntax.option( base, e ); break;
284                    default: translate( e ); break;
285                }
286            } else {
287                translate( c );
288            }
289        }
290    }
291
292    void translateA( final Element e ) throws JDOMException {
293        if( config.isNotIgnorableWikiMarkupLink( e ) ) {
294            if( e.getChild( "IMG" ) != null ) {
295                translateImage( e );
296            } else {
297                final String ref = config.trimLink( e.getAttributeValue( "href" ) );
298                if( ref == null ) {
299                    if( MarkupHelper.isUndefinedPageLink( e ) ) {
300                        syntax.aUndefined( e );
301                    } else {
302                        translate( e );
303                    }
304                } else if( MarkupHelper.isFootnoteLink( ref ) ) {
305                    final String href = ref.replaceFirst( "#ref-.+-(\\d+)", "$1" ); // convert "#ref-PageName-1" to just "1"
306                    final String textValue = e.getValue().substring( 1, ( e.getValue().length() - 1 ) ); // remove the brackets around "[1]"
307                    syntax.aFootnote( textValue, href );
308                } else {
309                    syntax.a( e, ref );
310                }
311            }
312        }
313    }
314
315    public void translateParagraph( final Element base ) throws JDOMException {
316        final ElementDecoratorData dto = buildElementDecoratorDataFrom( base );
317        syntax.paragraph( dto );
318    }
319
320    ElementDecoratorData buildElementDecoratorDataFrom( final Element base ) {
321        String n = base.getName().toLowerCase();
322        boolean bold = false;
323        boolean italic = false;
324        boolean monospace = false;
325        String cssSpecial = null;
326        final String cssClass = base.getAttributeValue( "class" );
327
328        // accomodate a FCKeditor bug with Firefox: when a link is removed, it becomes <span class="wikipage">text</span>.
329        final boolean ignoredCssClass = cssClass != null && cssClass.matches( "wikipage|createpage|external|interwiki|attachment|inline-code" );
330
331        Map< Object, Object > styleProps = null;
332
333        // Only get the styles if it's not a link element. Styles for link elements are handled as an AugmentedWikiLink instead.
334        if( !n.equals( "a" ) ) {
335            styleProps = getStylePropertiesLowerCase( base );
336        }
337
338        if( "inline-code".equals( cssClass ) ) {
339            monospace = true;
340        }
341
342        if( styleProps != null ) {
343            final String weight = ( String ) styleProps.remove( "font-weight" );
344            final String style = ( String ) styleProps.remove( "font-style" );
345
346            if ( n.equals( "p" ) ) {
347                // change it, so we can print out the css styles for <p>
348                n = "div";
349            }
350
351            italic = "oblique".equals( style ) || "italic".equals( style );
352            bold = "bold".equals( weight ) || "bolder".equals( weight );
353            if ( !styleProps.isEmpty() ) {
354                cssSpecial = propsToStyleString( styleProps );
355            }
356        }
357
358        final ElementDecoratorData dto = new ElementDecoratorData();
359        dto.base = base;
360        dto.bold = bold;
361        dto.cssClass = cssClass;
362        dto.cssSpecial = cssSpecial;
363        dto.htmlBase = n;
364        dto.ignoredCssClass = ignoredCssClass;
365        dto.italic = italic;
366        dto.monospace = monospace;
367        return dto;
368    }
369
370    private class PreDeque extends ArrayDeque< String > {
371
372        @Override
373        public void addFirst( final String item ) {
374            super.addFirst( item );
375            outTrimmer.setWhitespaceTrimMode( isEmpty() );
376        }
377
378        @Override
379        public String removeFirst() {
380            final String pop = super.removeFirst();
381            outTrimmer.setWhitespaceTrimMode( isEmpty() );
382            return pop;
383        }
384
385    }
386
387    /**
388     * Simple data placeholder class to move decoration state between plain text syntax translation related classes.
389     */
390    public static class ElementDecoratorData {
391
392        /** don't allow instantiation outside enclosing class. */
393        private ElementDecoratorData() {}
394
395        public Element base;
396        public String htmlBase;
397        public String cssClass;
398        public String cssSpecial;
399        public boolean monospace;
400        public boolean bold;
401        public boolean italic;
402        public boolean ignoredCssClass;
403    }
404
405}