001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.htmltowiki;
020
021import org.apache.commons.lang3.StringUtils;
022import org.apache.logging.log4j.LogManager;
023import org.apache.logging.log4j.Logger;
024import org.apache.wiki.api.core.Engine;
025import org.apache.wiki.htmltowiki.syntax.MarkupHelper;
026import org.apache.wiki.util.ClassUtil;
027import org.apache.wiki.util.XmlUtil;
028import org.jdom2.Content;
029import org.jdom2.Element;
030import org.jdom2.JDOMException;
031import org.jdom2.Verifier;
032import org.jdom2.Text;
033import org.jdom2.xpath.XPathFactory;
034
035import java.io.PrintWriter;
036import java.util.ArrayDeque;
037import java.util.Arrays;
038import java.util.Deque;
039import java.util.LinkedHashMap;
040import java.util.Map;
041import java.util.stream.Collectors;
042
043
044/**
045 * Converting XHtml to Wiki Markup.  This is the class which orchestrates all the heavy loading.
046 */
047public class XHtmlElementToWikiTranslator {
048
049    private static final Logger LOG = LogManager.getLogger( XHtmlElementToWikiTranslator.class );
050    private static final String DEFAULT_SYNTAX_DECORATOR = "org.apache.wiki.htmltowiki.syntax.jspwiki.JSPWikiSyntaxDecorator";
051
052    private final Engine e;
053    private final XHtmlToWikiConfig config;
054    private final WhitespaceTrimWriter outTrimmer = new WhitespaceTrimWriter();
055    private final SyntaxDecorator syntax;
056
057    /**
058     *  Create a new translator using the default config.
059     *
060     *  @param base The base element from which to start translating.
061     *  @throws JDOMException If the DOM tree is faulty.
062     */
063    public XHtmlElementToWikiTranslator( final Engine e, final Element base ) throws JDOMException, ReflectiveOperationException {
064        this( e, base, new XHtmlToWikiConfig() );
065    }
066
067    /**
068     *  Create a new translator using the specified config.
069     *
070     *  @param base The base element from which to start translating.
071     *  @param config The config to use.
072     *  @throws JDOMException If the DOM tree is faulty.
073     */
074    public XHtmlElementToWikiTranslator( final Engine e, final Element base, final XHtmlToWikiConfig config ) throws JDOMException, ReflectiveOperationException {
075        this.e = e;
076        this.config = config;
077        syntax = getSyntaxDecorator();
078        final PrintWriter out = new PrintWriter( outTrimmer );
079        final Deque< String > liStack = new ArrayDeque<>();
080        final Deque< String > preStack = new PreDeque();
081
082        syntax.init( out, liStack, preStack, outTrimmer, config, this );
083        translate( base );
084    }
085
086    SyntaxDecorator getSyntaxDecorator() throws ReflectiveOperationException {
087        String sdClass = e.getWikiProperties().getProperty( "jspwiki.syntax.decorator", DEFAULT_SYNTAX_DECORATOR );
088        if( !ClassUtil.assignable( sdClass, SyntaxDecorator.class.getName() ) ) {
089            LOG.warn( "{} does not subclass {} reverting to default syntax decorator.", sdClass, SyntaxDecorator.class.getName() );
090            sdClass = DEFAULT_SYNTAX_DECORATOR;
091        }
092        LOG.debug( "Using {} as markup parser.", sdClass );
093        return ClassUtil.buildInstance( sdClass );
094    }
095
096    /**
097     * Outputs parsed wikitext.
098     *
099     * @return parsed wikitext.
100     */
101    public String getWikiString() {
102        return outTrimmer.toString();
103    }
104
105    public void translate( final Content element ) throws JDOMException {
106        if( element instanceof Text ) {
107            translateText( ( Text ) element );
108        } else if( element instanceof Element ) {
109            final Element base = ( Element )element;
110            final String n = base.getName().toLowerCase();
111            final String reason = Verifier.checkElementName(n);
112
113            if (reason != null)  return; /* invalid element name */
114
115            if( "script".equals( n ) ) return; /* nono, not a good idea*/
116
117            if( "imageplugin".equals( base.getAttributeValue( "class" ) ) ) {
118                translateImage( base );
119            } else if( "wikiform".equals( base.getAttributeValue( "class" ) ) ) {
120                // only print the children if the div's class="wikiform", but not the div itself.
121                translateChildren( base );
122            } else {
123                translateParagraph( base );
124            }
125        }
126    }
127
128    public void translateText( final Text element ) {
129        syntax.text( element );
130    }
131
132    public void translateImage( final Element base ) {
133        Element child = XmlUtil.getXPathElement( base, "TBODY/TR/TD/*" );
134        if( child == null ) {
135            child = base;
136        }
137        final Element img;
138        final String href;
139        if( child.getName().equals( "A" ) ) {
140            img = child.getChild( "IMG" );
141            href = child.getAttributeValue( "href" );
142        } else {
143            img = child;
144            href = null;
145        }
146        if( img == null ) {
147            return;
148        }
149        final String src = config.trimLink( img.getAttributeValue( "src" ) );
150        if( src == null ) {
151            return;
152        }
153
154        final Map< String, Object > imageAttrs = new LinkedHashMap<>();
155        putIfNotEmpty( imageAttrs, "align", base.getAttributeValue( "align" ) );
156        putIfNotEmpty( imageAttrs, "height", img.getAttributeValue( "height" ) );
157        putIfNotEmpty( imageAttrs, "width", img.getAttributeValue( "width" ) );
158        putIfNotEmpty( imageAttrs, "alt", img.getAttributeValue( "alt" ) );
159        putIfNotEmpty( imageAttrs, "caption", emptyToNull( ( Element )XPathFactory.instance().compile(  "CAPTION" ).evaluateFirst( base ) ) );
160        putIfNotEmpty( imageAttrs, "link", href );
161        putIfNotEmpty( imageAttrs, "border", img.getAttributeValue( "border" ) );
162        putIfNotEmpty( imageAttrs, "style", base.getAttributeValue( "style" ) );
163        syntax.image( src, imageAttrs );
164    }
165
166    private void putIfNotEmpty( final Map< String, Object > map, final String key, final Object value ) {
167        if( value != null ) {
168            map.put( key, value );
169        }
170    }
171
172    private String emptyToNull( final Element e ) {
173        if( e == null ) {
174            return null;
175        }
176        final String s = e.getText();
177        return s == null ? null : ( s.replaceAll( "\\s", "" ).isEmpty() ? null : s );
178    }
179
180    private Map< Object, Object > getStylePropertiesLowerCase( final Element base ) {
181        final String n = base.getName().toLowerCase();
182
183        // "font-weight: bold; font-style: italic;"
184        String style = base.getAttributeValue( "style" );
185        if( style == null ) {
186            style = "";
187        }
188
189        if( n.equals( "p" ) || n.equals( "div" ) ) {
190            final String align = base.getAttributeValue( "align" );
191            if( align != null ) {
192                // only add the value of the align attribute if the text-align style didn't already exist.
193                if( !style.contains( "text-align" ) ) {
194                    style += ";text-align:" + align + ";";
195                }
196            }
197        }
198
199        if( n.equals( "font" ) ) {
200            final String color = base.getAttributeValue( "color" );
201            final String face = base.getAttributeValue( "face" );
202            final String size = base.getAttributeValue( "size" );
203            if( color != null ) {
204                style = style + "color:" + color + ";";
205            }
206            if( face != null ) {
207                style = style + "font-family:" + face + ";";
208            }
209            if( size != null ) {
210                switch ( size ) {
211                    case "1": style += "font-size:xx-small;"; break;
212                    case "2": style += "font-size:x-small;"; break;
213                    case "3": style += "font-size:small;"; break;
214                    case "4": style += "font-size:medium;"; break;
215                    case "5": style += "font-size:large;"; break;
216                    case "6": style += "font-size:x-large;"; break;
217                    case "7": style += "font-size:xx-large;"; break;
218                }
219            }
220        }
221
222        if( style.equals( "" ) ) {
223            return null;
224        }
225
226        final Map< Object, Object > m = new LinkedHashMap<>();
227        Arrays.stream( style.toLowerCase().split( ";" ) )
228              .filter( StringUtils::isNotBlank )
229              .forEach( prop -> m.put( prop.split( ":" )[ 0 ].trim(), prop.split( ":" )[ 1 ].trim() ) );
230        return m;
231    }
232
233    private String propsToStyleString( final Map< Object, Object >  styleProps ) {
234        return styleProps.entrySet().stream().map(entry -> " " + entry.getKey() + ": " + entry.getValue() + ";").collect(Collectors.joining());
235    }
236
237    public void translateChildren( final Element base ) throws JDOMException {
238        for( final Content c : base.getContent() ) {
239            if( c instanceof Element ) {
240                final Element e = ( Element )c;
241                final String n = e.getName().toLowerCase();
242                switch( n ) {
243                    case "h1": syntax.h1( e ); break;
244                    case "h2": syntax.h2( e ); break;
245                    case "h3": syntax.h3( e ); break;
246                    case "h4": syntax.h4( e ); break;
247                    case "p": syntax.p( e ); break;
248                    case "br": syntax.br( base, e ); break;
249                    case "hr": syntax.hr( e ); break;
250                    case "table": syntax.table( e ); break;
251                    case "tbody": syntax.tbody( e ); break;
252                    case "tr": syntax.tr( e ); break;
253                    case "td": syntax.td( e ); break;
254                    case "thead": syntax.thead( e ); break;
255                    case "th": syntax.th( e ); break;
256                    case "a": translateA( e ); break;
257                    case "b":
258                    case "strong": syntax.strong( e ); break;
259                    case "i":
260                    case "em":
261                    case "address": syntax.em( e ); break;
262                    case "u": syntax.underline( e ); break;
263                    case "strike": syntax.strike( e ); break;
264                    case "sub": syntax.sub( e ); break;
265                    case "sup": syntax.sup( e ); break;
266                    case "dl": syntax.dl( e ); break;
267                    case "dt": syntax.dt( e ); break;
268                    case "dd": syntax.dd( e ); break;
269                    case "ul": syntax.ul( e ); break;
270                    case "ol": syntax.ol( e ); break;
271                    case "li": syntax.li( base, e ); break;
272                    case "pre": syntax.pre( e ); break;
273                    case "code":
274                    case "tt": syntax.code( e ); break;
275                    case "img": syntax.img( e ); break;
276                    case "form": syntax.form( e ); break;
277                    case "input": syntax.input( e ); break;
278                    case "textarea": syntax.textarea( e ); break;
279                    case "select": syntax.select( e ); break;
280                    case "option": syntax.option( base, e ); break;
281                    default: translate( e ); break;
282                }
283            } else {
284                translate( c );
285            }
286        }
287    }
288
289    void translateA( final Element e ) throws JDOMException {
290        if( config.isNotIgnorableWikiMarkupLink( e ) ) {
291            if( e.getChild( "IMG" ) != null ) {
292                translateImage( e );
293            } else {
294                final String ref = config.trimLink( e.getAttributeValue( "href" ) );
295                if( ref == null ) {
296                    if( MarkupHelper.isUndefinedPageLink( e ) ) {
297                        syntax.aUndefined( e );
298                    } else {
299                        translate( e );
300                    }
301                } else if( MarkupHelper.isFootnoteLink( ref ) ) {
302                    final String href = ref.replaceFirst( "#ref-.+-(\\d+)", "$1" ); // convert "#ref-PageName-1" to just "1"
303                    final String textValue = e.getValue().substring( 1, ( e.getValue().length() - 1 ) ); // remove the brackets around "[1]"
304                    syntax.aFootnote( textValue, href );
305                } else {
306                    syntax.a( e, ref );
307                }
308            }
309        }
310    }
311
312    public void translateParagraph( final Element base ) throws JDOMException {
313        final ElementDecoratorData dto = buildElementDecoratorDataFrom( base );
314        syntax.paragraph( dto );
315    }
316
317    ElementDecoratorData buildElementDecoratorDataFrom( final Element base ) {
318        String n = base.getName().toLowerCase();
319        boolean bold = false;
320        boolean italic = false;
321        boolean monospace = false;
322        String cssSpecial = null;
323        final String cssClass = base.getAttributeValue( "class" );
324
325        // accomodate a FCKeditor bug with Firefox: when a link is removed, it becomes <span class="wikipage">text</span>.
326        final boolean ignoredCssClass = cssClass != null && cssClass.matches( "wikipage|createpage|external|interwiki|attachment|inline-code" );
327
328        Map< Object, Object > styleProps = null;
329
330        // Only get the styles if it's not a link element. Styles for link elements are handled as an AugmentedWikiLink instead.
331        if( !n.equals( "a" ) ) {
332            styleProps = getStylePropertiesLowerCase( base );
333        }
334
335        if( "inline-code".equals( cssClass ) ) {
336            monospace = true;
337        }
338
339        if( styleProps != null ) {
340            final String weight = ( String ) styleProps.remove( "font-weight" );
341            final String style = ( String ) styleProps.remove( "font-style" );
342
343            if ( n.equals( "p" ) ) {
344                // change it, so we can print out the css styles for <p>
345                n = "div";
346            }
347
348            italic = "oblique".equals( style ) || "italic".equals( style );
349            bold = "bold".equals( weight ) || "bolder".equals( weight );
350            if ( !styleProps.isEmpty() ) {
351                cssSpecial = propsToStyleString( styleProps );
352            }
353        }
354
355        final ElementDecoratorData dto = new ElementDecoratorData();
356        dto.base = base;
357        dto.bold = bold;
358        dto.cssClass = cssClass;
359        dto.cssSpecial = cssSpecial;
360        dto.htmlBase = n;
361        dto.ignoredCssClass = ignoredCssClass;
362        dto.italic = italic;
363        dto.monospace = monospace;
364        return dto;
365    }
366
367    private class PreDeque extends ArrayDeque< String > {
368
369        private static final long serialVersionUID = 2401529529970187649L;
370
371        @Override
372        public void addFirst( final String item ) {
373            super.addFirst( item );
374            outTrimmer.setWhitespaceTrimMode( isEmpty() );
375        }
376
377        @Override
378        public String removeFirst() {
379            final String pop = super.removeFirst();
380            outTrimmer.setWhitespaceTrimMode( isEmpty() );
381            return pop;
382        }
383
384    }
385
386    /**
387     * Simple data placeholder class to move decoration state between plain text syntax translation related classes.
388     */
389    public static class ElementDecoratorData {
390
391        /** don't allow instantiation outside enclosing class. */
392        private ElementDecoratorData() {}
393
394        public Element base;
395        public String htmlBase;
396        public String cssClass;
397        public String cssSpecial;
398        public boolean monospace;
399        public boolean bold;
400        public boolean italic;
401        public boolean ignoredCssClass;
402    }
403
404}