001/* 
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.  
018 */
019
020package org.apache.wiki.util;
021
022import org.apache.commons.lang3.StringUtils;
023import org.apache.logging.log4j.LogManager;
024import org.apache.logging.log4j.Logger;
025import org.jdom2.Document;
026import org.jdom2.Element;
027import org.jdom2.JDOMException;
028import org.jdom2.Text;
029import org.jdom2.filter.Filters;
030import org.jdom2.input.SAXBuilder;
031import org.jdom2.xpath.XPathExpression;
032import org.jdom2.xpath.XPathFactory;
033
034import java.io.IOException;
035import java.io.InputStream;
036import java.net.URL;
037import java.util.ArrayList;
038import java.util.Collections;
039import java.util.Enumeration;
040import java.util.HashSet;
041import java.util.List;
042import java.util.Set;
043
044/**
045 *  Utility class to parse XML files.
046 *  <p>
047 *  This uses JDOM2 as its backing implementation.
048 *  </p>
049 *  
050 * @since 2.10
051 */
052public final class XmlUtil  {
053
054    private static final String ALL_TEXT_NODES = "//text()";
055    private static final Logger LOG = LogManager.getLogger( XmlUtil.class );
056    private XmlUtil() {}
057    
058    /**
059     * Parses the given XML file and returns the requested nodes. If there's an error accessing or parsing the file, an
060     * empty list is returned.
061     * 
062     * @param xml file to parse; matches all resources from classpath, filters repeated items.
063     * @param requestedNodes requested nodes on the xml file
064     * @return the requested nodes of the XML file.
065     */
066    public static List< Element > parse( final String xml, final String requestedNodes ) {
067        if( StringUtils.isNotEmpty( xml ) && StringUtils.isNotEmpty( requestedNodes ) ) {
068            final Set< Element > readed = new HashSet<>();
069            final SAXBuilder builder = new SAXBuilder();
070            try {
071                final Enumeration< URL > resources = XmlUtil.class.getClassLoader().getResources( xml );
072                while( resources.hasMoreElements() ) {
073                    final URL resource = resources.nextElement();
074                    LOG.debug( "reading {}", resource.toString() );
075                    final Document doc = builder.build( resource );
076                    final XPathFactory xpfac = XPathFactory.instance();
077                    final XPathExpression<Element> xp = xpfac.compile( requestedNodes, Filters.element() );
078                    readed.addAll( xp.evaluate( doc ) ); // filter out repeated items
079                }
080                return new ArrayList<>( readed );
081            } catch( final IOException ioe ) {
082                LOG.error( "Couldn't load all {} resources", xml, ioe );
083            } catch( final JDOMException jdome ) {
084                LOG.error( "error parsing {} resources", xml, jdome );
085            }
086        }
087        return Collections.emptyList();
088    }
089    
090    /**
091     * Parses the given stream and returns the requested nodes. If there's an error accessing or parsing the stream, an
092     * empty list is returned.
093     * 
094     * @param xmlStream stream to parse.
095     * @param requestedNodes requestd nodes on the xml stream.
096     * @return the requested nodes of the XML stream.
097     */
098    public static List< Element > parse( final InputStream xmlStream, final String requestedNodes ) {
099        if( xmlStream != null && StringUtils.isNotEmpty( requestedNodes ) ) {
100            final SAXBuilder builder = new SAXBuilder();
101            try {
102                final Document doc = builder.build( xmlStream );
103                final XPathFactory xpfac = XPathFactory.instance();
104                final XPathExpression< Element > xp = xpfac.compile( requestedNodes, Filters.element() );
105                return xp.evaluate( doc );
106            } catch( final IOException ioe ) {
107                LOG.error( "Couldn't load all {} resources", xmlStream, ioe );
108            } catch( final JDOMException jdome ) {
109                LOG.error( "error parsing {} resources", xmlStream,  jdome );
110            }
111        }       
112        return Collections.emptyList();
113    }
114
115    /**
116     * Renders all the text() nodes from the DOM tree. This is very useful for cleaning away all the XHTML.
117     *
118     * @param doc Dom tree
119     * @return String containing only the text from the provided Dom tree.
120     */
121    public static String extractTextFromDocument( final Document doc ) {
122        if( doc == null ) {
123            return "";
124        }
125        final StringBuilder sb = new StringBuilder();
126        final List< ? > nodes = XPathFactory.instance().compile( ALL_TEXT_NODES ).evaluate( doc );
127        for( final Object el : nodes ) {
128            if( el instanceof Text ) {
129                sb.append( ( ( Text )el ).getValue() );
130            }
131        }
132
133        return sb.toString();
134    }
135
136    public static Element getXPathElement( final Element base, final String expression ) {
137        final List< ? > nodes = XPathFactory.instance().compile( expression ).evaluate( base );
138        if( nodes == null || nodes.size() == 0 ) {
139            return null;
140        } else {
141            return ( Element )nodes.get( 0 );
142        }
143    }
144
145}