001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019 020package org.apache.wiki.util; 021 022import org.apache.commons.lang3.StringUtils; 023import org.apache.logging.log4j.LogManager; 024import org.apache.logging.log4j.Logger; 025import org.jdom2.Document; 026import org.jdom2.Element; 027import org.jdom2.JDOMException; 028import org.jdom2.Text; 029import org.jdom2.filter.Filters; 030import org.jdom2.input.SAXBuilder; 031import org.jdom2.xpath.XPathExpression; 032import org.jdom2.xpath.XPathFactory; 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.net.URL; 037import java.util.ArrayList; 038import java.util.Collections; 039import java.util.Enumeration; 040import java.util.HashSet; 041import java.util.List; 042import java.util.Set; 043 044/** 045 * Utility class to parse XML files. 046 * <p> 047 * This uses JDOM2 as its backing implementation. 048 * </p> 049 * 050 * @since 2.10 051 */ 052public final class XmlUtil { 053 054 private static final String ALL_TEXT_NODES = "//text()"; 055 private static final Logger LOG = LogManager.getLogger( XmlUtil.class ); 056 private XmlUtil() {} 057 058 /** 059 * Parses the given XML file and returns the requested nodes. If there's an error accessing or parsing the file, an 060 * empty list is returned. 061 * 062 * @param xml file to parse; matches all resources from classpath, filters repeated items. 063 * @param requestedNodes requested nodes on the xml file 064 * @return the requested nodes of the XML file. 065 */ 066 public static List< Element > parse( final String xml, final String requestedNodes ) { 067 if( StringUtils.isNotEmpty( xml ) && StringUtils.isNotEmpty( requestedNodes ) ) { 068 final Set< Element > readed = new HashSet<>(); 069 final SAXBuilder builder = new SAXBuilder(); 070 try { 071 final Enumeration< URL > resources = XmlUtil.class.getClassLoader().getResources( xml ); 072 while( resources.hasMoreElements() ) { 073 final URL resource = resources.nextElement(); 074 LOG.debug( "reading {}", resource.toString() ); 075 final Document doc = builder.build( resource ); 076 final XPathFactory xpfac = XPathFactory.instance(); 077 final XPathExpression<Element> xp = xpfac.compile( requestedNodes, Filters.element() ); 078 readed.addAll( xp.evaluate( doc ) ); // filter out repeated items 079 } 080 return new ArrayList<>( readed ); 081 } catch( final IOException ioe ) { 082 LOG.error( "Couldn't load all {} resources", xml, ioe ); 083 } catch( final JDOMException jdome ) { 084 LOG.error( "error parsing {} resources", xml, jdome ); 085 } 086 } 087 return Collections.emptyList(); 088 } 089 090 /** 091 * Parses the given stream and returns the requested nodes. If there's an error accessing or parsing the stream, an 092 * empty list is returned. 093 * 094 * @param xmlStream stream to parse. 095 * @param requestedNodes requestd nodes on the xml stream. 096 * @return the requested nodes of the XML stream. 097 */ 098 public static List< Element > parse( final InputStream xmlStream, final String requestedNodes ) { 099 if( xmlStream != null && StringUtils.isNotEmpty( requestedNodes ) ) { 100 final SAXBuilder builder = new SAXBuilder(); 101 try { 102 final Document doc = builder.build( xmlStream ); 103 final XPathFactory xpfac = XPathFactory.instance(); 104 final XPathExpression< Element > xp = xpfac.compile( requestedNodes, Filters.element() ); 105 return xp.evaluate( doc ); 106 } catch( final IOException ioe ) { 107 LOG.error( "Couldn't load all {} resources", xmlStream, ioe ); 108 } catch( final JDOMException jdome ) { 109 LOG.error( "error parsing {} resources", xmlStream, jdome ); 110 } 111 } 112 return Collections.emptyList(); 113 } 114 115 /** 116 * Renders all the text() nodes from the DOM tree. This is very useful for cleaning away all the XHTML. 117 * 118 * @param doc Dom tree 119 * @return String containing only the text from the provided Dom tree. 120 */ 121 public static String extractTextFromDocument( final Document doc ) { 122 if( doc == null ) { 123 return ""; 124 } 125 final StringBuilder sb = new StringBuilder(); 126 final List< ? > nodes = XPathFactory.instance().compile( ALL_TEXT_NODES ).evaluate( doc ); 127 for( final Object el : nodes ) { 128 if( el instanceof Text ) { 129 sb.append( ( ( Text )el ).getValue() ); 130 } 131 } 132 133 return sb.toString(); 134 } 135 136 public static Element getXPathElement( final Element base, final String expression ) { 137 final List< ? > nodes = XPathFactory.instance().compile( expression ).evaluate( base ); 138 if( nodes == null || nodes.size() == 0 ) { 139 return null; 140 } else { 141 return ( Element )nodes.get( 0 ); 142 } 143 } 144 145}