001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.htmltowiki; 020 021import java.io.IOException; 022import java.io.StringReader; 023 024import org.apache.wiki.WikiContext; 025import org.jdom2.Document; 026import org.jdom2.Element; 027import org.jdom2.JDOMException; 028import org.jdom2.input.SAXBuilder; 029import org.jdom2.input.sax.XMLReaderSAX2Factory; 030import org.jdom2.output.XMLOutputter; 031 032/** 033 * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and 034 * Xhtml2WikiTranslator for converting xhtml to Wiki Markup. 035 * 036 */ 037public class HtmlStringToWikiTranslator 038{ 039 040 private static final String CYBERNEKO_PARSER = "org.cyberneko.html.parsers.SAXParser"; 041 042 /** 043 * Create a new translator. 044 */ 045 public HtmlStringToWikiTranslator() 046 {} 047 048 /** 049 * Translates text from HTML into WikiMarkup without a WikiContext (meaning 050 * some things perhaps cannot be translated). Uses the default configuration. 051 * 052 * @param html HTML text to translate 053 * @return WikiMarkup 054 * 055 * @throws JDOMException If parsing fails 056 * @throws IOException For other kinds of errors. 057 */ 058 public String translate( String html ) throws JDOMException, IOException 059 { 060 return translate( html, new XHtmlToWikiConfig() ); 061 } 062 063 /** 064 * Translates text from HTML into WikiMarkup with a WikiContext. The translation 065 * accuracy is better. Uses the default configuration. 066 * 067 * @param html HTML text to translate 068 * @param wikiContext The WikiContext to use. 069 * @return WikiMarkup 070 * 071 * @throws JDOMException If parsing fails 072 * @throws IOException For other kinds of errors. 073 */ 074 public String translate( String html, WikiContext wikiContext ) throws JDOMException, IOException 075 { 076 return translate( html, new XHtmlToWikiConfig( wikiContext ) ); 077 } 078 079 /** 080 * Translates text from HTML into WikiMarkup using a specified configuration. 081 * 082 * @param html HTML text to translate 083 * @param config The configuration to use. 084 * @return WikiMarkup 085 * 086 * @throws JDOMException If parsing fails 087 * @throws IOException For other kinds of errors. 088 */ 089 090 public String translate( String html, XHtmlToWikiConfig config ) throws JDOMException, IOException 091 { 092 Element element = htmlStringToElement( html ); 093 XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( element, config ); 094 String wikiMarkup = xhtmlTranslator.getWikiString(); 095 return wikiMarkup; 096 } 097 098 /** 099 * Use NekoHtml to parse HTML like well formed XHTML 100 * 101 * @param html 102 * @return xhtml jdom root element (node "HTML") 103 * @throws JDOMException 104 * @throws IOException 105 */ 106 private Element htmlStringToElement( String html ) throws JDOMException, IOException 107 { 108 SAXBuilder builder = new SAXBuilder( new XMLReaderSAX2Factory( true, CYBERNEKO_PARSER), null, null ); 109 Document doc = builder.build( new StringReader( html ) ); 110 Element element = doc.getRootElement(); 111 return element; 112 } 113 114 /** 115 * A static helper method to create HTML from an Element. 116 * 117 * @param element The element to get HTML from. 118 * @return HTML 119 */ 120 public static String element2String( Element element ) 121 { 122 Document document = new Document( element ); 123 XMLOutputter outputter = new XMLOutputter(); 124 return outputter.outputString( document ); 125 } 126 127}