001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.htmltowiki; 020 021import org.apache.wiki.api.core.Context; 022import org.apache.wiki.api.core.Engine; 023import org.codelibs.nekohtml.parsers.SAXParser; 024import org.jdom2.Document; 025import org.jdom2.Element; 026import org.jdom2.JDOMException; 027import org.jdom2.input.SAXBuilder; 028import org.jdom2.input.sax.XMLReaderSAX2Factory; 029import org.jdom2.output.XMLOutputter; 030 031import java.io.IOException; 032import java.io.StringReader; 033 034/** 035 * Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and 036 * Xhtml2WikiTranslator for converting xhtml to Wiki Markup. 037 * 038 */ 039public class HtmlStringToWikiTranslator { 040 041 private static final String CYBERNEKO_PARSER = SAXParser.class.getName(); 042 private final Engine e; 043 044 /** 045 * Create a new translator. 046 */ 047 public HtmlStringToWikiTranslator( final Engine e ) { 048 this.e = e; 049 } 050 051 /** 052 * Translates text from HTML into WikiMarkup without a WikiContext (meaning 053 * some things perhaps cannot be translated). Uses the default configuration. 054 * 055 * @param html HTML text to translate 056 * @return WikiMarkup 057 * 058 * @throws JDOMException If parsing fails 059 * @throws IOException For other kinds of errors. 060 */ 061 public String translate( final String html ) throws JDOMException, IOException, ReflectiveOperationException { 062 return translate( html, new XHtmlToWikiConfig() ); 063 } 064 065 /** 066 * Translates text from HTML into WikiMarkup with a WikiContext. The translation 067 * accuracy is better. Uses the default configuration. 068 * 069 * @param html HTML text to translate 070 * @param wikiContext The WikiContext to use. 071 * @return WikiMarkup 072 * 073 * @throws JDOMException If parsing fails 074 * @throws IOException For other kinds of errors. 075 */ 076 public String translate( final String html, final Context wikiContext ) throws JDOMException, IOException, ReflectiveOperationException { 077 return translate( html, new XHtmlToWikiConfig( wikiContext ) ); 078 } 079 080 /** 081 * Translates text from HTML into WikiMarkup using a specified configuration. 082 * 083 * @param html HTML text to translate 084 * @param config The configuration to use. 085 * @return WikiMarkup 086 * 087 * @throws JDOMException If parsing fails 088 * @throws IOException For other kinds of errors. 089 */ 090 public String translate( final String html, final XHtmlToWikiConfig config ) throws JDOMException, IOException, ReflectiveOperationException { 091 final Element element = htmlStringToElement( html ); 092 final XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( e, element, config ); 093 return xhtmlTranslator.getWikiString(); 094 } 095 096 /** 097 * Use NekoHtml to parse HTML like well-formed XHTML 098 * 099 * @param html HTML to parse. 100 * @return xhtml jdom root element (node "HTML") 101 * @throws JDOMException when errors occur in parsing 102 * @throws IOException when an I/O error prevents a document from being fully parsed 103 */ 104 private Element htmlStringToElement( final String html ) throws JDOMException, IOException { 105 final SAXBuilder builder = new SAXBuilder( new XMLReaderSAX2Factory( true, CYBERNEKO_PARSER ), null, null ); 106 //builder.setProperty( XMLConstants.ACCESS_EXTERNAL_DTD, "" ); 107 //builder.setProperty( XMLConstants.ACCESS_EXTERNAL_SCHEMA, "" ); 108 final Document doc = builder.build( new StringReader( html ) ); 109 return doc.getRootElement(); 110 } 111 112 /** 113 * A static helper method to create HTML from an Element. 114 * 115 * @param element The element to get HTML from. 116 * @return HTML 117 */ 118 public static String element2String( final Element element ) { 119 final Document document = new Document( element ); 120 final XMLOutputter outputter = new XMLOutputter(); 121 return outputter.outputString( document ); 122 } 123 124}