001    /* 
002        Licensed to the Apache Software Foundation (ASF) under one
003        or more contributor license agreements.  See the NOTICE file
004        distributed with this work for additional information
005        regarding copyright ownership.  The ASF licenses this file
006        to you under the Apache License, Version 2.0 (the
007        "License"); you may not use this file except in compliance
008        with the License.  You may obtain a copy of the License at
009    
010           http://www.apache.org/licenses/LICENSE-2.0
011    
012        Unless required by applicable law or agreed to in writing,
013        software distributed under the License is distributed on an
014        "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015        KIND, either express or implied.  See the License for the
016        specific language governing permissions and limitations
017        under the License.  
018     */
019    package org.apache.wiki.htmltowiki;
020    
021    import java.io.IOException;
022    import java.io.StringReader;
023    
024    import org.jdom2.Document;
025    import org.jdom2.Element;
026    import org.jdom2.JDOMException;
027    import org.jdom2.input.SAXBuilder;
028    import org.jdom2.output.XMLOutputter;
029    
030    import org.apache.wiki.WikiContext;
031    
032    /**
033     * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and
034     * Xhtml2WikiTranslator for converting xhtml to Wiki Markup.
035     * 
036     */
037    public class HtmlStringToWikiTranslator
038    {
039    
040        private static final String CYBERNEKO_PARSER = "org.cyberneko.html.parsers.SAXParser";
041    
042        /**
043         *  Create a new translator.
044         */
045        public HtmlStringToWikiTranslator()
046        {}
047    
048        /**
049         *  Translates text from HTML into WikiMarkup without a WikiContext (meaning
050         *  some things perhaps cannot be translated).  Uses the default configuration.
051         *  
052         *  @param html HTML text to translate
053         *  @return WikiMarkup
054         *  
055         *  @throws JDOMException If parsing fails
056         *  @throws IOException For other kinds of errors.
057         */
058        public String translate( String html ) throws JDOMException, IOException
059        {
060            return translate( html, new XHtmlToWikiConfig() );
061        }
062    
063        /**
064         *  Translates text from HTML into WikiMarkup with a WikiContext.  The translation
065         *  accuracy is better.  Uses the default configuration.
066         *  
067         *  @param html HTML text to translate
068         *  @param wikiContext The WikiContext to use.
069         *  @return WikiMarkup
070         *  
071         *  @throws JDOMException If parsing fails
072         *  @throws IOException For other kinds of errors.
073         */
074        public String translate( String html, WikiContext wikiContext ) throws JDOMException, IOException
075        {
076            return translate( html, new XHtmlToWikiConfig( wikiContext ) );
077        }
078    
079        /**
080         *  Translates text from HTML into WikiMarkup using a specified configuration.
081         *  
082         *  @param html HTML text to translate
083         *  @param config The configuration to use.
084         *  @return WikiMarkup
085         *  
086         *  @throws JDOMException If parsing fails
087         *  @throws IOException For other kinds of errors.
088         */
089    
090        public String translate( String html, XHtmlToWikiConfig config ) throws JDOMException, IOException
091        {
092            Element element = htmlStringToElement( html );
093            XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( element, config );
094            String wikiMarkup = xhtmlTranslator.getWikiString();
095            return wikiMarkup;
096        }
097    
098        /**
099         * Use NekoHtml to parse HTML like well formed XHTML
100         * 
101         * @param html
102         * @return xhtml jdom root element (node "HTML")
103         * @throws JDOMException
104         * @throws IOException
105         */
106        private Element htmlStringToElement( String html ) throws JDOMException, IOException
107        {
108            SAXBuilder builder = new SAXBuilder( CYBERNEKO_PARSER, true );
109            Document doc = builder.build( new StringReader( html ) );
110            Element element = doc.getRootElement();
111            return element;
112        }
113    
114        /**
115         *  A static helper method to create HTML from an Element.
116         *  
117         *  @param element The element to get HTML from.
118         *  @return HTML
119         */
120        public static String element2String( Element element )
121        {
122            Document document = new Document( element );
123            XMLOutputter outputter = new XMLOutputter();
124            return outputter.outputString( document );
125        }
126    
127    }