001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019 020package org.apache.wiki.util; 021 022import org.apache.commons.lang3.StringUtils; 023import org.apache.logging.log4j.LogManager; 024import org.apache.logging.log4j.Logger; 025import org.jdom2.Document; 026import org.jdom2.Element; 027import org.jdom2.JDOMException; 028import org.jdom2.Text; 029import org.jdom2.filter.Filters; 030import org.jdom2.input.SAXBuilder; 031import org.jdom2.xpath.XPathExpression; 032import org.jdom2.xpath.XPathFactory; 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.net.URL; 037import java.util.ArrayList; 038import java.util.Collections; 039import java.util.Enumeration; 040import java.util.HashSet; 041import java.util.List; 042import java.util.Set; 043import java.util.stream.Collectors; 044 045/** 046 * Utility class to parse XML files. 047 * <p> 048 * This uses JDOM2 as its backing implementation. 049 * </p> 050 * 051 * @since 2.10 052 */ 053public final class XmlUtil { 054 055 private static final String ALL_TEXT_NODES = "//text()"; 056 private static final Logger LOG = LogManager.getLogger( XmlUtil.class ); 057 private XmlUtil() {} 058 059 /** 060 * Parses the given XML file and returns the requested nodes. If there's an error accessing or parsing the file, an 061 * empty list is returned. 062 * 063 * @param xml file to parse; matches all resources from classpath, filters repeated items. 064 * @param requestedNodes requested nodes on the xml file 065 * @return the requested nodes of the XML file. 066 */ 067 public static List< Element > parse( final String xml, final String requestedNodes ) { 068 if( StringUtils.isNotEmpty( xml ) && StringUtils.isNotEmpty( requestedNodes ) ) { 069 final Set< Element > readed = new HashSet<>(); 070 final SAXBuilder builder = new SAXBuilder(); 071 try { 072 final Enumeration< URL > resources = XmlUtil.class.getClassLoader().getResources( xml ); 073 while( resources.hasMoreElements() ) { 074 final URL resource = resources.nextElement(); 075 LOG.debug( "reading {}", resource.toString() ); 076 final Document doc = builder.build( resource ); 077 final XPathFactory xpfac = XPathFactory.instance(); 078 final XPathExpression<Element> xp = xpfac.compile( requestedNodes, Filters.element() ); 079 readed.addAll( xp.evaluate( doc ) ); // filter out repeated items 080 } 081 return new ArrayList<>( readed ); 082 } catch( final IOException ioe ) { 083 LOG.error( "Couldn't load all {} resources", xml, ioe ); 084 } catch( final JDOMException jdome ) { 085 LOG.error( "error parsing {} resources", xml, jdome ); 086 } 087 } 088 return Collections.emptyList(); 089 } 090 091 /** 092 * Parses the given stream and returns the requested nodes. If there's an error accessing or parsing the stream, an 093 * empty list is returned. 094 * 095 * @param xmlStream stream to parse. 096 * @param requestedNodes requestd nodes on the xml stream. 097 * @return the requested nodes of the XML stream. 098 */ 099 public static List< Element > parse( final InputStream xmlStream, final String requestedNodes ) { 100 if( xmlStream != null && StringUtils.isNotEmpty( requestedNodes ) ) { 101 final SAXBuilder builder = new SAXBuilder(); 102 try { 103 final Document doc = builder.build( xmlStream ); 104 final XPathFactory xpfac = XPathFactory.instance(); 105 final XPathExpression< Element > xp = xpfac.compile( requestedNodes, Filters.element() ); 106 return xp.evaluate( doc ); 107 } catch( final IOException ioe ) { 108 LOG.error( "Couldn't load all {} resources", xmlStream, ioe ); 109 } catch( final JDOMException jdome ) { 110 LOG.error( "error parsing {} resources", xmlStream, jdome ); 111 } 112 } 113 return Collections.emptyList(); 114 } 115 116 /** 117 * Renders all the text() nodes from the DOM tree. This is very useful for cleaning away all the XHTML. 118 * 119 * @param doc Dom tree 120 * @return String containing only the text from the provided Dom tree. 121 */ 122 public static String extractTextFromDocument( final Document doc ) { 123 if( doc == null ) { 124 return ""; 125 } 126 final String sb; 127 final List< ? > nodes = XPathFactory.instance().compile( ALL_TEXT_NODES ).evaluate( doc ); 128 sb = nodes.stream().filter(el -> el instanceof Text).map(el -> ((Text) el).getValue()).collect(Collectors.joining()); 129 130 return sb; 131 } 132 133 public static Element getXPathElement( final Element base, final String expression ) { 134 final List< ? > nodes = XPathFactory.instance().compile( expression ).evaluate( base ); 135 if( nodes == null || nodes.size() == 0 ) { 136 return null; 137 } else { 138 return ( Element )nodes.get( 0 ); 139 } 140 } 141 142}