001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.htmltowiki;
020
021import org.apache.wiki.api.core.Context;
022import org.jdom2.Document;
023import org.jdom2.Element;
024import org.jdom2.JDOMException;
025import org.jdom2.input.SAXBuilder;
026import org.jdom2.input.sax.XMLReaderSAX2Factory;
027import org.jdom2.output.XMLOutputter;
028
029import java.io.IOException;
030import java.io.StringReader;
031
032/**
033 * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and
034 * Xhtml2WikiTranslator for converting xhtml to Wiki Markup.
035 *
036 */
037public class HtmlStringToWikiTranslator
038{
039
040    private static final String CYBERNEKO_PARSER = "org.cyberneko.html.parsers.SAXParser";
041
042    /**
043     *  Create a new translator.
044     */
045    public HtmlStringToWikiTranslator()
046    {}
047
048    /**
049     *  Translates text from HTML into WikiMarkup without a WikiContext (meaning
050     *  some things perhaps cannot be translated).  Uses the default configuration.
051     *
052     *  @param html HTML text to translate
053     *  @return WikiMarkup
054     *
055     *  @throws JDOMException If parsing fails
056     *  @throws IOException For other kinds of errors.
057     */
058    public String translate( final String html ) throws JDOMException, IOException
059    {
060        return translate( html, new XHtmlToWikiConfig() );
061    }
062
063    /**
064     *  Translates text from HTML into WikiMarkup with a WikiContext.  The translation
065     *  accuracy is better.  Uses the default configuration.
066     *
067     *  @param html HTML text to translate
068     *  @param wikiContext The WikiContext to use.
069     *  @return WikiMarkup
070     *
071     *  @throws JDOMException If parsing fails
072     *  @throws IOException For other kinds of errors.
073     */
074    public String translate( final String html, final Context wikiContext ) throws JDOMException, IOException
075    {
076        return translate( html, new XHtmlToWikiConfig( wikiContext ) );
077    }
078
079    /**
080     *  Translates text from HTML into WikiMarkup using a specified configuration.
081     *
082     *  @param html HTML text to translate
083     *  @param config The configuration to use.
084     *  @return WikiMarkup
085     *
086     *  @throws JDOMException If parsing fails
087     *  @throws IOException For other kinds of errors.
088     */
089
090    public String translate( final String html, final XHtmlToWikiConfig config ) throws JDOMException, IOException
091    {
092        final Element element = htmlStringToElement( html );
093        final XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( element, config );
094        return xhtmlTranslator.getWikiString();
095    }
096
097    /**
098     * Use NekoHtml to parse HTML like well-formed XHTML
099     *
100     * @param html HTML to parse.
101     * @return xhtml jdom root element (node "HTML")
102     * @throws JDOMException when errors occur in parsing
103     * @throws IOException when an I/O error prevents a document from being fully parsed
104     */
105    private Element htmlStringToElement( final String html ) throws JDOMException, IOException
106    {
107        final SAXBuilder builder = new SAXBuilder( new XMLReaderSAX2Factory( true, CYBERNEKO_PARSER), null, null );
108        final Document doc = builder.build( new StringReader( html ) );
109        return doc.getRootElement();
110    }
111
112    /**
113     *  A static helper method to create HTML from an Element.
114     *
115     *  @param element The element to get HTML from.
116     *  @return HTML
117     */
118    public static String element2String( final Element element )
119    {
120        final Document document = new Document( element );
121        final XMLOutputter outputter = new XMLOutputter();
122        return outputter.outputString( document );
123    }
124
125}