001/* 
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.  
018 */
019
020package org.apache.wiki.util;
021
022import org.apache.commons.lang3.StringUtils;
023import org.apache.logging.log4j.LogManager;
024import org.apache.logging.log4j.Logger;
025import org.jdom2.Document;
026import org.jdom2.Element;
027import org.jdom2.JDOMException;
028import org.jdom2.Text;
029import org.jdom2.filter.Filters;
030import org.jdom2.input.SAXBuilder;
031import org.jdom2.xpath.XPathExpression;
032import org.jdom2.xpath.XPathFactory;
033
034import java.io.IOException;
035import java.io.InputStream;
036import java.net.URL;
037import java.util.ArrayList;
038import java.util.Collections;
039import java.util.Enumeration;
040import java.util.HashSet;
041import java.util.List;
042import java.util.Set;
043import java.util.stream.Collectors;
044
045/**
046 *  Utility class to parse XML files.
047 *  <p>
048 *  This uses JDOM2 as its backing implementation.
049 *  </p>
050 *  
051 * @since 2.10
052 */
053public final class XmlUtil  {
054
055    private static final String ALL_TEXT_NODES = "//text()";
056    private static final Logger LOG = LogManager.getLogger( XmlUtil.class );
057    private XmlUtil() {}
058    
059    /**
060     * Parses the given XML file and returns the requested nodes. If there's an error accessing or parsing the file, an
061     * empty list is returned.
062     * 
063     * @param xml file to parse; matches all resources from classpath, filters repeated items.
064     * @param requestedNodes requested nodes on the xml file
065     * @return the requested nodes of the XML file.
066     */
067    public static List< Element > parse( final String xml, final String requestedNodes ) {
068        if( StringUtils.isNotEmpty( xml ) && StringUtils.isNotEmpty( requestedNodes ) ) {
069            final Set< Element > readed = new HashSet<>();
070            final SAXBuilder builder = new SAXBuilder();
071            try {
072                final Enumeration< URL > resources = XmlUtil.class.getClassLoader().getResources( xml );
073                while( resources.hasMoreElements() ) {
074                    final URL resource = resources.nextElement();
075                    LOG.debug( "reading {}", resource.toString() );
076                    final Document doc = builder.build( resource );
077                    final XPathFactory xpfac = XPathFactory.instance();
078                    final XPathExpression<Element> xp = xpfac.compile( requestedNodes, Filters.element() );
079                    readed.addAll( xp.evaluate( doc ) ); // filter out repeated items
080                }
081                return new ArrayList<>( readed );
082            } catch( final IOException ioe ) {
083                LOG.error( "Couldn't load all {} resources", xml, ioe );
084            } catch( final JDOMException jdome ) {
085                LOG.error( "error parsing {} resources", xml, jdome );
086            }
087        }
088        return Collections.emptyList();
089    }
090    
091    /**
092     * Parses the given stream and returns the requested nodes. If there's an error accessing or parsing the stream, an
093     * empty list is returned.
094     * 
095     * @param xmlStream stream to parse.
096     * @param requestedNodes requestd nodes on the xml stream.
097     * @return the requested nodes of the XML stream.
098     */
099    public static List< Element > parse( final InputStream xmlStream, final String requestedNodes ) {
100        if( xmlStream != null && StringUtils.isNotEmpty( requestedNodes ) ) {
101            final SAXBuilder builder = new SAXBuilder();
102            try {
103                final Document doc = builder.build( xmlStream );
104                final XPathFactory xpfac = XPathFactory.instance();
105                final XPathExpression< Element > xp = xpfac.compile( requestedNodes, Filters.element() );
106                return xp.evaluate( doc );
107            } catch( final IOException ioe ) {
108                LOG.error( "Couldn't load all {} resources", xmlStream, ioe );
109            } catch( final JDOMException jdome ) {
110                LOG.error( "error parsing {} resources", xmlStream,  jdome );
111            }
112        }       
113        return Collections.emptyList();
114    }
115
116    /**
117     * Renders all the text() nodes from the DOM tree. This is very useful for cleaning away all the XHTML.
118     *
119     * @param doc Dom tree
120     * @return String containing only the text from the provided Dom tree.
121     */
122    public static String extractTextFromDocument( final Document doc ) {
123        if( doc == null ) {
124            return "";
125        }
126        final String sb;
127        final List< ? > nodes = XPathFactory.instance().compile( ALL_TEXT_NODES ).evaluate( doc );
128        sb = nodes.stream().filter(el -> el instanceof Text).map(el -> ((Text) el).getValue()).collect(Collectors.joining());
129
130        return sb;
131    }
132
133    public static Element getXPathElement( final Element base, final String expression ) {
134        final List< ? > nodes = XPathFactory.instance().compile( expression ).evaluate( base );
135        if( nodes == null || nodes.size() == 0 ) {
136            return null;
137        } else {
138            return ( Element )nodes.get( 0 );
139        }
140    }
141
142}