001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019 020package org.apache.wiki.util; 021 022import org.apache.commons.lang3.StringUtils; 023import org.apache.logging.log4j.LogManager; 024import org.apache.logging.log4j.Logger; 025import org.jdom2.Document; 026import org.jdom2.Element; 027import org.jdom2.JDOMException; 028import org.jdom2.Text; 029import org.jdom2.filter.Filters; 030import org.jdom2.input.SAXBuilder; 031import org.jdom2.xpath.XPathExpression; 032import org.jdom2.xpath.XPathFactory; 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.net.URL; 037import java.util.ArrayList; 038import java.util.Collections; 039import java.util.Enumeration; 040import java.util.HashSet; 041import java.util.List; 042import java.util.Set; 043 044/** 045 * Utility class to parse XML files. 046 * <p> 047 * This uses JDOM2 as its backing implementation. 048 * </p> 049 * 050 * @since 2.10 051 */ 052public final class XmlUtil { 053 054 private static final String ALL_TEXT_NODES = "//text()"; 055 private static final Logger LOG = LogManager.getLogger( XmlUtil.class ); 056 private XmlUtil() {} 057 058 /** 059 * Parses the given XML file and returns the requested nodes. If there's an error accessing or parsing the file, an 060 * empty list is returned. 061 * 062 * @param xml file to parse; matches all resources from classpath, filters repeated items. 063 * @param requestedNodes requested nodes on the xml file 064 * @return the requested nodes of the XML file. 065 */ 066 public static List<Element> parse( final String xml, final String requestedNodes ) 067 { 068 if( StringUtils.isNotEmpty( xml ) && StringUtils.isNotEmpty( requestedNodes ) ) { 069 final Set<Element> readed = new HashSet<>(); 070 final SAXBuilder builder = new SAXBuilder(); 071 try { 072 final Enumeration< URL > resources = XmlUtil.class.getClassLoader().getResources( xml ); 073 while( resources.hasMoreElements() ) { 074 final URL resource = resources.nextElement(); 075 LOG.debug( "reading " + resource.toString() ); 076 final Document doc = builder.build( resource ); 077 final XPathFactory xpfac = XPathFactory.instance(); 078 final XPathExpression<Element> xp = xpfac.compile( requestedNodes, Filters.element() ); 079 readed.addAll( xp.evaluate( doc ) ); // filter out repeated items 080 } 081 return new ArrayList<>( readed ); 082 } catch( final IOException ioe ) { 083 LOG.error( "Couldn't load all " + xml + " resources", ioe ); 084 } catch( final JDOMException jdome ) { 085 LOG.error( "error parsing " + xml + " resources", jdome ); 086 } 087 } 088 return Collections.emptyList(); 089 } 090 091 /** 092 * Parses the given stream and returns the requested nodes. If there's an error accessing or parsing the stream, an 093 * empty list is returned. 094 * 095 * @param xmlStream stream to parse. 096 * @param requestedNodes requestd nodes on the xml stream. 097 * @return the requested nodes of the XML stream. 098 */ 099 public static List< Element > parse( final InputStream xmlStream, final String requestedNodes ) { 100 if( xmlStream != null && StringUtils.isNotEmpty( requestedNodes ) ) { 101 final SAXBuilder builder = new SAXBuilder(); 102 try { 103 final Document doc = builder.build( xmlStream ); 104 final XPathFactory xpfac = XPathFactory.instance(); 105 final XPathExpression< Element > xp = xpfac.compile( requestedNodes,Filters.element() ); 106 return xp.evaluate( doc ); 107 } catch( final IOException ioe ) { 108 LOG.error( "Couldn't load all " + xmlStream + " resources", ioe ); 109 } catch( final JDOMException jdome ) { 110 LOG.error( "error parsing " + xmlStream + " resources", jdome ); 111 } 112 } 113 return Collections.emptyList(); 114 } 115 116 /** 117 * Renders all the text() nodes from the DOM tree. This is very useful for cleaning away all of the XHTML. 118 * 119 * @param doc Dom tree 120 * @return String containing only the text from the provided Dom tree. 121 */ 122 public static String extractTextFromDocument( final Document doc ) { 123 if( doc == null ) { 124 return ""; 125 } 126 final StringBuilder sb = new StringBuilder(); 127 final List< ? > nodes = XPathFactory.instance().compile( ALL_TEXT_NODES ).evaluate( doc ); 128 for( final Object el : nodes ) { 129 if( el instanceof Text ) { 130 sb.append( ( ( Text )el ).getValue() ); 131 } 132 } 133 134 return sb.toString(); 135 } 136 137 public static Element getXPathElement( final Element base, final String expression ) { 138 final List< ? > nodes = XPathFactory.instance().compile( expression ).evaluate( base ); 139 if( nodes == null || nodes.size() == 0 ) { 140 return null; 141 } else { 142 return ( Element )nodes.get( 0 ); 143 } 144 } 145 146}