001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.htmltowiki;
020
021import org.apache.wiki.api.core.Context;
022import org.jdom2.Document;
023import org.jdom2.Element;
024import org.jdom2.JDOMException;
025import org.jdom2.input.SAXBuilder;
026import org.jdom2.input.sax.XMLReaderSAX2Factory;
027import org.jdom2.output.XMLOutputter;
028
029import java.io.IOException;
030import java.io.StringReader;
031
032/**
033 * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and
034 * Xhtml2WikiTranslator for converting xhtml to Wiki Markup.
035 *
036 */
037public class HtmlStringToWikiTranslator
038{
039
040    private static final String CYBERNEKO_PARSER = "org.cyberneko.html.parsers.SAXParser";
041
042    /**
043     *  Create a new translator.
044     */
045    public HtmlStringToWikiTranslator()
046    {}
047
048    /**
049     *  Translates text from HTML into WikiMarkup without a WikiContext (meaning
050     *  some things perhaps cannot be translated).  Uses the default configuration.
051     *
052     *  @param html HTML text to translate
053     *  @return WikiMarkup
054     *
055     *  @throws JDOMException If parsing fails
056     *  @throws IOException For other kinds of errors.
057     */
058    public String translate( final String html ) throws JDOMException, IOException
059    {
060        return translate( html, new XHtmlToWikiConfig() );
061    }
062
063    /**
064     *  Translates text from HTML into WikiMarkup with a WikiContext.  The translation
065     *  accuracy is better.  Uses the default configuration.
066     *
067     *  @param html HTML text to translate
068     *  @param wikiContext The WikiContext to use.
069     *  @return WikiMarkup
070     *
071     *  @throws JDOMException If parsing fails
072     *  @throws IOException For other kinds of errors.
073     */
074    public String translate( final String html, final Context wikiContext ) throws JDOMException, IOException
075    {
076        return translate( html, new XHtmlToWikiConfig( wikiContext ) );
077    }
078
079    /**
080     *  Translates text from HTML into WikiMarkup using a specified configuration.
081     *
082     *  @param html HTML text to translate
083     *  @param config The configuration to use.
084     *  @return WikiMarkup
085     *
086     *  @throws JDOMException If parsing fails
087     *  @throws IOException For other kinds of errors.
088     */
089
090    public String translate( final String html, final XHtmlToWikiConfig config ) throws JDOMException, IOException
091    {
092        final Element element = htmlStringToElement( html );
093        final XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( element, config );
094        final String wikiMarkup = xhtmlTranslator.getWikiString();
095        return wikiMarkup;
096    }
097
098    /**
099     * Use NekoHtml to parse HTML like well formed XHTML
100     *
101     * @param html
102     * @return xhtml jdom root element (node "HTML")
103     * @throws JDOMException
104     * @throws IOException
105     */
106    private Element htmlStringToElement( final String html ) throws JDOMException, IOException
107    {
108        final SAXBuilder builder = new SAXBuilder( new XMLReaderSAX2Factory( true, CYBERNEKO_PARSER), null, null );
109        final Document doc = builder.build( new StringReader( html ) );
110        final Element element = doc.getRootElement();
111        return element;
112    }
113
114    /**
115     *  A static helper method to create HTML from an Element.
116     *
117     *  @param element The element to get HTML from.
118     *  @return HTML
119     */
120    public static String element2String( final Element element )
121    {
122        final Document document = new Document( element );
123        final XMLOutputter outputter = new XMLOutputter();
124        return outputter.outputString( document );
125    }
126
127}