001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.htmltowiki;
020
021import java.io.IOException;
022import java.io.StringReader;
023
024import org.apache.wiki.WikiContext;
025import org.jdom2.Document;
026import org.jdom2.Element;
027import org.jdom2.JDOMException;
028import org.jdom2.input.SAXBuilder;
029import org.jdom2.input.sax.XMLReaderSAX2Factory;
030import org.jdom2.output.XMLOutputter;
031
032/**
033 * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and
034 * Xhtml2WikiTranslator for converting xhtml to Wiki Markup.
035 *
036 */
037public class HtmlStringToWikiTranslator
038{
039
040    private static final String CYBERNEKO_PARSER = "org.cyberneko.html.parsers.SAXParser";
041
042    /**
043     *  Create a new translator.
044     */
045    public HtmlStringToWikiTranslator()
046    {}
047
048    /**
049     *  Translates text from HTML into WikiMarkup without a WikiContext (meaning
050     *  some things perhaps cannot be translated).  Uses the default configuration.
051     *
052     *  @param html HTML text to translate
053     *  @return WikiMarkup
054     *
055     *  @throws JDOMException If parsing fails
056     *  @throws IOException For other kinds of errors.
057     */
058    public String translate( String html ) throws JDOMException, IOException
059    {
060        return translate( html, new XHtmlToWikiConfig() );
061    }
062
063    /**
064     *  Translates text from HTML into WikiMarkup with a WikiContext.  The translation
065     *  accuracy is better.  Uses the default configuration.
066     *
067     *  @param html HTML text to translate
068     *  @param wikiContext The WikiContext to use.
069     *  @return WikiMarkup
070     *
071     *  @throws JDOMException If parsing fails
072     *  @throws IOException For other kinds of errors.
073     */
074    public String translate( String html, WikiContext wikiContext ) throws JDOMException, IOException
075    {
076        return translate( html, new XHtmlToWikiConfig( wikiContext ) );
077    }
078
079    /**
080     *  Translates text from HTML into WikiMarkup using a specified configuration.
081     *
082     *  @param html HTML text to translate
083     *  @param config The configuration to use.
084     *  @return WikiMarkup
085     *
086     *  @throws JDOMException If parsing fails
087     *  @throws IOException For other kinds of errors.
088     */
089
090    public String translate( String html, XHtmlToWikiConfig config ) throws JDOMException, IOException
091    {
092        Element element = htmlStringToElement( html );
093        XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( element, config );
094        String wikiMarkup = xhtmlTranslator.getWikiString();
095        return wikiMarkup;
096    }
097
098    /**
099     * Use NekoHtml to parse HTML like well formed XHTML
100     *
101     * @param html
102     * @return xhtml jdom root element (node "HTML")
103     * @throws JDOMException
104     * @throws IOException
105     */
106    private Element htmlStringToElement( String html ) throws JDOMException, IOException
107    {
108        SAXBuilder builder = new SAXBuilder( new XMLReaderSAX2Factory( true, CYBERNEKO_PARSER), null, null );
109        Document doc = builder.build( new StringReader( html ) );
110        Element element = doc.getRootElement();
111        return element;
112    }
113
114    /**
115     *  A static helper method to create HTML from an Element.
116     *
117     *  @param element The element to get HTML from.
118     *  @return HTML
119     */
120    public static String element2String( Element element )
121    {
122        Document document = new Document( element );
123        XMLOutputter outputter = new XMLOutputter();
124        return outputter.outputString( document );
125    }
126
127}