001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.htmltowiki;
020
021import org.apache.wiki.api.core.Context;
022import org.apache.wiki.api.core.Engine;
023import org.codelibs.nekohtml.parsers.SAXParser;
024import org.jdom2.Document;
025import org.jdom2.Element;
026import org.jdom2.JDOMException;
027import org.jdom2.input.SAXBuilder;
028import org.jdom2.input.sax.XMLReaderSAX2Factory;
029import org.jdom2.output.XMLOutputter;
030
031import java.io.IOException;
032import java.io.StringReader;
033
034/**
035 * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and
036 * Xhtml2WikiTranslator for converting xhtml to Wiki Markup.
037 *
038 */
039public class HtmlStringToWikiTranslator {
040
041    private static final String CYBERNEKO_PARSER = SAXParser.class.getName();
042    private final Engine e;
043
044    /**
045     *  Create a new translator.
046     */
047    public HtmlStringToWikiTranslator( final Engine e ) {
048        this.e = e;
049    }
050
051    /**
052     *  Translates text from HTML into WikiMarkup without a WikiContext (meaning
053     *  some things perhaps cannot be translated).  Uses the default configuration.
054     *
055     *  @param html HTML text to translate
056     *  @return WikiMarkup
057     *
058     *  @throws JDOMException If parsing fails
059     *  @throws IOException For other kinds of errors.
060     */
061    public String translate( final String html ) throws JDOMException, IOException, ReflectiveOperationException {
062        return translate( html, new XHtmlToWikiConfig() );
063    }
064
065    /**
066     *  Translates text from HTML into WikiMarkup with a WikiContext.  The translation
067     *  accuracy is better.  Uses the default configuration.
068     *
069     *  @param html HTML text to translate
070     *  @param wikiContext The WikiContext to use.
071     *  @return WikiMarkup
072     *
073     *  @throws JDOMException If parsing fails
074     *  @throws IOException For other kinds of errors.
075     */
076    public String translate( final String html, final Context wikiContext ) throws JDOMException, IOException, ReflectiveOperationException {
077        return translate( html, new XHtmlToWikiConfig( wikiContext ) );
078    }
079
080    /**
081     *  Translates text from HTML into WikiMarkup using a specified configuration.
082     *
083     *  @param html HTML text to translate
084     *  @param config The configuration to use.
085     *  @return WikiMarkup
086     *
087     *  @throws JDOMException If parsing fails
088     *  @throws IOException For other kinds of errors.
089     */
090    public String translate( final String html, final XHtmlToWikiConfig config ) throws JDOMException, IOException, ReflectiveOperationException {
091        final Element element = htmlStringToElement( html );
092        final XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( e, element, config );
093        return xhtmlTranslator.getWikiString();
094    }
095
096    /**
097     * Use NekoHtml to parse HTML like well-formed XHTML
098     *
099     * @param html HTML to parse.
100     * @return xhtml jdom root element (node "HTML")
101     * @throws JDOMException when errors occur in parsing
102     * @throws IOException when an I/O error prevents a document from being fully parsed
103     */
104    private Element htmlStringToElement( final String html ) throws JDOMException, IOException {
105        final SAXBuilder builder = new SAXBuilder( new XMLReaderSAX2Factory( true, CYBERNEKO_PARSER ), null, null );
106        //builder.setProperty( XMLConstants.ACCESS_EXTERNAL_DTD, "" );
107        //builder.setProperty( XMLConstants.ACCESS_EXTERNAL_SCHEMA, "" );
108        final Document doc = builder.build( new StringReader( html ) );
109        return doc.getRootElement();
110    }
111
112    /**
113     *  A static helper method to create HTML from an Element.
114     *
115     *  @param element The element to get HTML from.
116     *  @return HTML
117     */
118    public static String element2String( final Element element ) {
119        final Document document = new Document( element );
120        final XMLOutputter outputter = new XMLOutputter();
121        return outputter.outputString( document );
122    }
123
124}