001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.htmltowiki; 020 021import org.apache.wiki.api.core.Context; 022import org.apache.wiki.api.core.Engine; 023import org.jdom2.Document; 024import org.jdom2.Element; 025import org.jdom2.JDOMException; 026import org.jdom2.input.SAXBuilder; 027import org.jdom2.input.sax.XMLReaderSAX2Factory; 028import org.jdom2.output.XMLOutputter; 029 030import java.io.IOException; 031import java.io.StringReader; 032 033/** 034 * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and 035 * Xhtml2WikiTranslator for converting xhtml to Wiki Markup. 036 * 037 */ 038public class HtmlStringToWikiTranslator { 039 040 private static final String CYBERNEKO_PARSER = "org.cyberneko.html.parsers.SAXParser"; 041 private final Engine e; 042 043 /** 044 * Create a new translator. 045 */ 046 public HtmlStringToWikiTranslator( final Engine e ) { 047 this.e = e; 048 } 049 050 /** 051 * Translates text from HTML into WikiMarkup without a WikiContext (meaning 052 * some things perhaps cannot be translated). Uses the default configuration. 053 * 054 * @param html HTML text to translate 055 * @return WikiMarkup 056 * 057 * @throws JDOMException If parsing fails 058 * @throws IOException For other kinds of errors. 059 */ 060 public String translate( final String html ) throws JDOMException, IOException, ReflectiveOperationException { 061 return translate( html, new XHtmlToWikiConfig() ); 062 } 063 064 /** 065 * Translates text from HTML into WikiMarkup with a WikiContext. The translation 066 * accuracy is better. Uses the default configuration. 067 * 068 * @param html HTML text to translate 069 * @param wikiContext The WikiContext to use. 070 * @return WikiMarkup 071 * 072 * @throws JDOMException If parsing fails 073 * @throws IOException For other kinds of errors. 074 */ 075 public String translate( final String html, final Context wikiContext ) throws JDOMException, IOException, ReflectiveOperationException { 076 return translate( html, new XHtmlToWikiConfig( wikiContext ) ); 077 } 078 079 /** 080 * Translates text from HTML into WikiMarkup using a specified configuration. 081 * 082 * @param html HTML text to translate 083 * @param config The configuration to use. 084 * @return WikiMarkup 085 * 086 * @throws JDOMException If parsing fails 087 * @throws IOException For other kinds of errors. 088 */ 089 public String translate( final String html, final XHtmlToWikiConfig config ) throws JDOMException, IOException, ReflectiveOperationException { 090 final Element element = htmlStringToElement( html ); 091 final XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( e, element, config ); 092 return xhtmlTranslator.getWikiString(); 093 } 094 095 /** 096 * Use NekoHtml to parse HTML like well-formed XHTML 097 * 098 * @param html HTML to parse. 099 * @return xhtml jdom root element (node "HTML") 100 * @throws JDOMException when errors occur in parsing 101 * @throws IOException when an I/O error prevents a document from being fully parsed 102 */ 103 private Element htmlStringToElement( final String html ) throws JDOMException, IOException { 104 final SAXBuilder builder = new SAXBuilder( new XMLReaderSAX2Factory( true, CYBERNEKO_PARSER ), null, null ); 105 final Document doc = builder.build( new StringReader( html ) ); 106 return doc.getRootElement(); 107 } 108 109 /** 110 * A static helper method to create HTML from an Element. 111 * 112 * @param element The element to get HTML from. 113 * @return HTML 114 */ 115 public static String element2String( final Element element ) { 116 final Document document = new Document( element ); 117 final XMLOutputter outputter = new XMLOutputter(); 118 return outputter.outputString( document ); 119 } 120 121}