001 /*
002 Licensed to the Apache Software Foundation (ASF) under one
003 or more contributor license agreements. See the NOTICE file
004 distributed with this work for additional information
005 regarding copyright ownership. The ASF licenses this file
006 to you under the Apache License, Version 2.0 (the
007 "License"); you may not use this file except in compliance
008 with the License. You may obtain a copy of the License at
009
010 http://www.apache.org/licenses/LICENSE-2.0
011
012 Unless required by applicable law or agreed to in writing,
013 software distributed under the License is distributed on an
014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 KIND, either express or implied. See the License for the
016 specific language governing permissions and limitations
017 under the License.
018 */
019 package org.apache.wiki.htmltowiki;
020
021 import java.io.IOException;
022 import java.io.StringReader;
023
024 import org.jdom2.Document;
025 import org.jdom2.Element;
026 import org.jdom2.JDOMException;
027 import org.jdom2.input.SAXBuilder;
028 import org.jdom2.output.XMLOutputter;
029
030 import org.apache.wiki.WikiContext;
031
032 /**
033 * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and
034 * Xhtml2WikiTranslator for converting xhtml to Wiki Markup.
035 *
036 */
037 public class HtmlStringToWikiTranslator
038 {
039
040 private static final String CYBERNEKO_PARSER = "org.cyberneko.html.parsers.SAXParser";
041
042 /**
043 * Create a new translator.
044 */
045 public HtmlStringToWikiTranslator()
046 {}
047
048 /**
049 * Translates text from HTML into WikiMarkup without a WikiContext (meaning
050 * some things perhaps cannot be translated). Uses the default configuration.
051 *
052 * @param html HTML text to translate
053 * @return WikiMarkup
054 *
055 * @throws JDOMException If parsing fails
056 * @throws IOException For other kinds of errors.
057 */
058 public String translate( String html ) throws JDOMException, IOException
059 {
060 return translate( html, new XHtmlToWikiConfig() );
061 }
062
063 /**
064 * Translates text from HTML into WikiMarkup with a WikiContext. The translation
065 * accuracy is better. Uses the default configuration.
066 *
067 * @param html HTML text to translate
068 * @param wikiContext The WikiContext to use.
069 * @return WikiMarkup
070 *
071 * @throws JDOMException If parsing fails
072 * @throws IOException For other kinds of errors.
073 */
074 public String translate( String html, WikiContext wikiContext ) throws JDOMException, IOException
075 {
076 return translate( html, new XHtmlToWikiConfig( wikiContext ) );
077 }
078
079 /**
080 * Translates text from HTML into WikiMarkup using a specified configuration.
081 *
082 * @param html HTML text to translate
083 * @param config The configuration to use.
084 * @return WikiMarkup
085 *
086 * @throws JDOMException If parsing fails
087 * @throws IOException For other kinds of errors.
088 */
089
090 public String translate( String html, XHtmlToWikiConfig config ) throws JDOMException, IOException
091 {
092 Element element = htmlStringToElement( html );
093 XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( element, config );
094 String wikiMarkup = xhtmlTranslator.getWikiString();
095 return wikiMarkup;
096 }
097
098 /**
099 * Use NekoHtml to parse HTML like well formed XHTML
100 *
101 * @param html
102 * @return xhtml jdom root element (node "HTML")
103 * @throws JDOMException
104 * @throws IOException
105 */
106 private Element htmlStringToElement( String html ) throws JDOMException, IOException
107 {
108 SAXBuilder builder = new SAXBuilder( CYBERNEKO_PARSER, true );
109 Document doc = builder.build( new StringReader( html ) );
110 Element element = doc.getRootElement();
111 return element;
112 }
113
114 /**
115 * A static helper method to create HTML from an Element.
116 *
117 * @param element The element to get HTML from.
118 * @return HTML
119 */
120 public static String element2String( Element element )
121 {
122 Document document = new Document( element );
123 XMLOutputter outputter = new XMLOutputter();
124 return outputter.outputString( document );
125 }
126
127 }