001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.htmltowiki;
020
021import org.apache.wiki.api.core.Context;
022import org.apache.wiki.api.core.Engine;
023import org.jdom2.Document;
024import org.jdom2.Element;
025import org.jdom2.JDOMException;
026import org.jdom2.input.SAXBuilder;
027import org.jdom2.input.sax.XMLReaderSAX2Factory;
028import org.jdom2.output.XMLOutputter;
029
030import java.io.IOException;
031import java.io.StringReader;
032
033/**
034 * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and
035 * Xhtml2WikiTranslator for converting xhtml to Wiki Markup.
036 *
037 */
038public class HtmlStringToWikiTranslator {
039
040    private static final String CYBERNEKO_PARSER = "org.cyberneko.html.parsers.SAXParser";
041    private final Engine e;
042
043    /**
044     *  Create a new translator.
045     */
046    public HtmlStringToWikiTranslator( final Engine e ) {
047        this.e = e;
048    }
049
050    /**
051     *  Translates text from HTML into WikiMarkup without a WikiContext (meaning
052     *  some things perhaps cannot be translated).  Uses the default configuration.
053     *
054     *  @param html HTML text to translate
055     *  @return WikiMarkup
056     *
057     *  @throws JDOMException If parsing fails
058     *  @throws IOException For other kinds of errors.
059     */
060    public String translate( final String html ) throws JDOMException, IOException, ReflectiveOperationException {
061        return translate( html, new XHtmlToWikiConfig() );
062    }
063
064    /**
065     *  Translates text from HTML into WikiMarkup with a WikiContext.  The translation
066     *  accuracy is better.  Uses the default configuration.
067     *
068     *  @param html HTML text to translate
069     *  @param wikiContext The WikiContext to use.
070     *  @return WikiMarkup
071     *
072     *  @throws JDOMException If parsing fails
073     *  @throws IOException For other kinds of errors.
074     */
075    public String translate( final String html, final Context wikiContext ) throws JDOMException, IOException, ReflectiveOperationException {
076        return translate( html, new XHtmlToWikiConfig( wikiContext ) );
077    }
078
079    /**
080     *  Translates text from HTML into WikiMarkup using a specified configuration.
081     *
082     *  @param html HTML text to translate
083     *  @param config The configuration to use.
084     *  @return WikiMarkup
085     *
086     *  @throws JDOMException If parsing fails
087     *  @throws IOException For other kinds of errors.
088     */
089    public String translate( final String html, final XHtmlToWikiConfig config ) throws JDOMException, IOException, ReflectiveOperationException {
090        final Element element = htmlStringToElement( html );
091        final XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( e, element, config );
092        return xhtmlTranslator.getWikiString();
093    }
094
095    /**
096     * Use NekoHtml to parse HTML like well-formed XHTML
097     *
098     * @param html HTML to parse.
099     * @return xhtml jdom root element (node "HTML")
100     * @throws JDOMException when errors occur in parsing
101     * @throws IOException when an I/O error prevents a document from being fully parsed
102     */
103    private Element htmlStringToElement( final String html ) throws JDOMException, IOException {
104        final SAXBuilder builder = new SAXBuilder( new XMLReaderSAX2Factory( true, CYBERNEKO_PARSER ), null, null );
105        final Document doc = builder.build( new StringReader( html ) );
106        return doc.getRootElement();
107    }
108
109    /**
110     *  A static helper method to create HTML from an Element.
111     *
112     *  @param element The element to get HTML from.
113     *  @return HTML
114     */
115    public static String element2String( final Element element ) {
116        final Document document = new Document( element );
117        final XMLOutputter outputter = new XMLOutputter();
118        return outputter.outputString( document );
119    }
120
121}