001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.  
018*/
019
020package org.apache.wiki.diff;
021
022import org.apache.logging.log4j.LogManager;
023import org.apache.logging.log4j.Logger;
024import org.apache.wiki.api.core.Context;
025import org.apache.wiki.api.core.Engine;
026import org.apache.wiki.api.exceptions.NoRequiredPropertyException;
027import org.apache.wiki.util.TextUtil;
028import org.suigeneris.jrcs.diff.Diff;
029import org.suigeneris.jrcs.diff.DifferentiationFailedException;
030import org.suigeneris.jrcs.diff.Revision;
031import org.suigeneris.jrcs.diff.RevisionVisitor;
032import org.suigeneris.jrcs.diff.delta.AddDelta;
033import org.suigeneris.jrcs.diff.delta.ChangeDelta;
034import org.suigeneris.jrcs.diff.delta.Chunk;
035import org.suigeneris.jrcs.diff.delta.DeleteDelta;
036import org.suigeneris.jrcs.diff.delta.Delta;
037import org.suigeneris.jrcs.diff.myers.MyersDiff;
038
039import java.io.IOException;
040import java.util.ArrayList;
041import java.util.Arrays;
042import java.util.List;
043import java.util.Properties;
044import java.util.StringTokenizer;
045import java.util.stream.Collectors;
046
047
048/**
049 * A seriously better diff provider, which highlights changes word-by-word using CSS.
050 *
051 * Suggested by John Volkar.
052 */
053public class ContextualDiffProvider implements DiffProvider {
054
055    private static final Logger LOG = LogManager.getLogger( ContextualDiffProvider.class );
056
057    /**
058     *  A jspwiki.properties value to define how many characters are shown around the change context.
059     *  The current value is <tt>{@value}</tt>.
060     */
061    public static final String PROP_UNCHANGED_CONTEXT_LIMIT = "jspwiki.contextualDiffProvider.unchangedContextLimit";
062
063    //TODO all of these publics can become jspwiki.properties entries...
064    //TODO span title= can be used to get hover info...
065
066    public boolean m_emitChangeNextPreviousHyperlinks = true;
067
068    //Don't use spans here the deletion and insertions are nested in this...
069    public static String CHANGE_START_HTML = ""; //This could be a image '>' for a start marker
070    public static String CHANGE_END_HTML = ""; //and an image for an end '<' marker
071    public static String DIFF_START = "<div class=\"diff-wikitext\">";
072    public static String DIFF_END = "</div>";
073
074    // Unfortunately we need to do dumb HTML here for RSS feeds.
075
076    public static String INSERTION_START_HTML = "<font color=\"#8000FF\"><span class=\"diff-insertion\">";
077    public static String INSERTION_END_HTML = "</span></font>";
078    public static String DELETION_START_HTML = "<strike><font color=\"red\"><span class=\"diff-deletion\">";
079    public static String DELETION_END_HTML = "</span></font></strike>";
080    private static final String ANCHOR_PRE_INDEX = "<a name=\"change-";
081    private static final String ANCHOR_POST_INDEX = "\" />";
082    private static final String BACK_PRE_INDEX = "<a class=\"diff-nextprev\" title=\"Go to previous change\" href=\"#change-";
083    private static final String BACK_POST_INDEX = "\">&lt;&lt;</a>";
084    private static final String FORWARD_PRE_INDEX = "<a class=\"diff-nextprev\" title=\"Go to next change\" href=\"#change-";
085    private static final String FORWARD_POST_INDEX = "\">&gt;&gt;</a>";
086    public static String ELIDED_HEAD_INDICATOR_HTML = "<br/><br/><b>...</b>";
087    public static String ELIDED_TAIL_INDICATOR_HTML = "<b>...</b><br/><br/>";
088    public static String LINE_BREAK_HTML = "<br />";
089    public static String ALTERNATING_SPACE_HTML = "&nbsp;";
090
091    // This one, I will make property file based...
092    private static final int LIMIT_MAX_VALUE = (Integer.MAX_VALUE /2) - 1;
093    private int m_unchangedContextLimit = LIMIT_MAX_VALUE;
094
095
096    /**
097     *  Constructs this provider.
098     */
099    public ContextualDiffProvider()
100    {}
101
102    /**
103     * @see org.apache.wiki.api.providers.WikiProvider#getProviderInfo()
104     * 
105     * {@inheritDoc}
106     */
107    @Override
108    public String getProviderInfo()
109    {
110        return "ContextualDiffProvider";
111    }
112
113    /**
114     * @see org.apache.wiki.api.providers.WikiProvider#initialize(org.apache.wiki.api.core.Engine, java.util.Properties)
115     *      
116     * {@inheritDoc}
117     */
118    @Override
119    public void initialize( final Engine engine, final Properties properties) throws NoRequiredPropertyException, IOException {
120        final String configuredLimit = properties.getProperty( PROP_UNCHANGED_CONTEXT_LIMIT, Integer.toString( LIMIT_MAX_VALUE ) );
121        int limit = LIMIT_MAX_VALUE;
122        try {
123            limit = Integer.parseInt( configuredLimit );
124        } catch( final NumberFormatException e ) {
125            LOG.warn("Failed to parseInt " + PROP_UNCHANGED_CONTEXT_LIMIT + "=" + configuredLimit + " Will use a huge number as limit.", e );
126        }
127        m_unchangedContextLimit = limit;
128    }
129
130
131
132    /**
133     * Do a colored diff of the two regions. This. is. serious. fun. ;-)
134     *
135     * @see org.apache.wiki.diff.DiffProvider#makeDiffHtml(Context, String, String)
136     * 
137     * {@inheritDoc}
138     */
139    @Override
140    public synchronized String makeDiffHtml( final Context ctx, final String wikiOld, final String wikiNew ) {
141        //
142        // Sequencing handles lineterminator to <br /> and every-other consequtive space to a &nbsp;
143        //
144        final String[] alpha = sequence( TextUtil.replaceEntities( wikiOld ) );
145        final String[] beta  = sequence( TextUtil.replaceEntities( wikiNew ) );
146
147        final Revision rev;
148        try {
149            rev = Diff.diff( alpha, beta, new MyersDiff() );
150        } catch( final DifferentiationFailedException dfe ) {
151            LOG.error( "Diff generation failed", dfe );
152            return "Error while creating version diff.";
153        }
154
155        final int revSize = rev.size();
156        final StringBuffer sb = new StringBuffer();
157
158        sb.append( DIFF_START );
159
160        //
161        // The MyersDiff is a bit dumb by converting a single line multi-word diff into a series
162        // of Changes. The ChangeMerger pulls them together again...
163        //
164        final ChangeMerger cm = new ChangeMerger( sb, alpha, revSize );
165        rev.accept( cm );
166        cm.shutdown();
167        sb.append( DIFF_END );
168        return sb.toString();
169    }
170
171    /**
172     * Take the string and create an array from it, split it first on newlines, making
173     * sure to preserve the newlines in the elements, split each resulting element on
174     * spaces, preserving the spaces.
175     *
176     * All this preseving of newlines and spaces is so the wikitext when diffed will have fidelity
177     * to it's original form.  As a side affect we see edits of purely whilespace.
178     */
179    private String[] sequence( final String wikiText ) {
180        final String[] linesArray = Diff.stringToArray( wikiText );
181        final List< String > list = new ArrayList<>();
182        for( final String line : linesArray ) {
183
184            String lastToken = null;
185            String token;
186            // StringTokenizer might be discouraged but it still is perfect here...
187            for( final StringTokenizer st = new StringTokenizer( line, " ", true ); st.hasMoreTokens(); ) {
188                token = st.nextToken();
189
190                if( " ".equals( lastToken ) && " ".equals( token ) ) {
191                    token = ALTERNATING_SPACE_HTML;
192                }
193
194                list.add( token );
195                lastToken = token;
196            }
197
198            list.add( LINE_BREAK_HTML ); // Line Break
199        }
200
201        return list.toArray( new String[ 0 ] );
202    }
203
204    /**
205     * This helper class does the housekeeping for merging
206     * our various changes down and also makes sure that the
207     * whole change process is threadsafe by encapsulating
208     * all necessary variables.
209     */
210    private final class ChangeMerger implements RevisionVisitor {
211        private final StringBuffer m_sb;
212
213        /** Keeping score of the original lines to process */
214        private final int m_max;
215
216        private int m_index;
217
218        /** Index of the next element to be copied into the output. */
219        private int m_firstElem;
220
221        /** Link Anchor counter */
222        private int m_count = 1;
223
224        /** State Machine Mode */
225        private int m_mode = -1; /* -1: Unset, 0: Add, 1: Del, 2: Change mode */
226
227        /** Buffer to coalesce the changes together */
228        private StringBuffer m_origBuf;
229
230        private StringBuffer m_newBuf;
231
232        /** Reference to the source string array */
233        private final String[] m_origStrings;
234
235        private ChangeMerger( final StringBuffer sb, final String[] origStrings, final int max ) {
236            m_sb = sb;
237            m_origStrings = origStrings != null ? origStrings.clone() : null;
238            m_max = max;
239
240            m_origBuf = new StringBuffer();
241            m_newBuf = new StringBuffer();
242        }
243
244        private void updateState( final Delta delta ) {
245            m_index++;
246            final Chunk orig = delta.getOriginal();
247            if( orig.first() > m_firstElem ) {
248                // We "skip" some lines in the output.
249                // So flush out the last Change, if one exists.
250                flushChanges();
251
252                // Allow us to "skip" large swaths of unchanged text, show a "limited" amound of
253                // unchanged context so the changes are shown in
254                if( ( orig.first() - m_firstElem ) > 2 * m_unchangedContextLimit ) {
255                    if (m_firstElem > 0) {
256                        final int endIndex = Math.min( m_firstElem + m_unchangedContextLimit, m_origStrings.length -1 );
257
258                        m_sb.append(Arrays.stream(m_origStrings, m_firstElem, endIndex).collect(Collectors.joining("", "", ELIDED_TAIL_INDICATOR_HTML)));
259
260                    }
261
262                    m_sb.append( ELIDED_HEAD_INDICATOR_HTML );
263
264                    final int startIndex = Math.max(orig.first() - m_unchangedContextLimit, 0);
265                    m_sb.append(Arrays.stream(m_origStrings, startIndex, orig.first()).collect(Collectors.joining()));
266
267                } else {
268                    // No need to skip anything, just output the whole range...
269                    m_sb.append(Arrays.stream(m_origStrings, m_firstElem, orig.first()).collect(Collectors.joining()));
270                }
271            }
272            m_firstElem = orig.last() + 1;
273        }
274
275        @Override
276        public void visit( final Revision rev ) {
277            // GNDN (Goes nowhere, does nothing)
278        }
279
280        @Override
281        public void visit( final AddDelta delta ) {
282            updateState( delta );
283
284            // We have run Deletes up to now. Flush them out.
285            if( m_mode == 1 ) {
286                flushChanges();
287                m_mode = -1;
288            }
289            // We are in "neutral mode". Start a new Change
290            if( m_mode == -1 ) {
291                m_mode = 0;
292            }
293
294            // We are in "add mode".
295            if( m_mode == 0 || m_mode == 2 ) {
296                addNew( delta.getRevised() );
297                m_mode = 1;
298            }
299        }
300
301        @Override
302        public void visit( final ChangeDelta delta ) {
303            updateState( delta );
304
305            // We are in "neutral mode". A Change might be merged with an add or delete.
306            if( m_mode == -1 ) {
307                m_mode = 2;
308            }
309
310            // Add the Changes to the buffers.
311            addOrig( delta.getOriginal() );
312            addNew( delta.getRevised() );
313        }
314
315        @Override
316        public void visit( final DeleteDelta delta ) {
317            updateState( delta );
318
319            // We have run Adds up to now. Flush them out.
320            if( m_mode == 0 ) {
321                flushChanges();
322                m_mode = -1;
323            }
324            // We are in "neutral mode". Start a new Change
325            if( m_mode == -1 ) {
326                m_mode = 1;
327            }
328
329            // We are in "delete mode".
330            if( m_mode == 1 || m_mode == 2 ) {
331                addOrig( delta.getOriginal() );
332                m_mode = 1;
333            }
334        }
335
336        public void shutdown() {
337            m_index = m_max + 1; // Make sure that no hyperlink gets created
338            flushChanges();
339
340            if( m_firstElem < m_origStrings.length ) {
341                // If there's more than the limit of the orginal left just emit limit and elided...
342                if( ( m_origStrings.length - m_firstElem ) > m_unchangedContextLimit ) {
343                    final int endIndex = Math.min( m_firstElem + m_unchangedContextLimit, m_origStrings.length -1 );
344                    m_sb.append(Arrays.stream(m_origStrings, m_firstElem, endIndex).collect(Collectors.joining("", "", ELIDED_TAIL_INDICATOR_HTML)));
345
346                } else {
347                // emit entire tail of original...
348                    m_sb.append(Arrays.stream(m_origStrings, m_firstElem, m_origStrings.length).collect(Collectors.joining()));
349                }
350            }
351        }
352
353        private void addOrig( final Chunk chunk ) {
354            if( chunk != null ) {
355                chunk.toString( m_origBuf );
356            }
357        }
358
359        private void addNew( final Chunk chunk ) {
360            if( chunk != null ) {
361                chunk.toString( m_newBuf );
362            }
363        }
364
365        private void flushChanges() {
366            if( m_newBuf.length() + m_origBuf.length() > 0 ) {
367                // This is the span element which encapsulates anchor and the change itself
368                m_sb.append( CHANGE_START_HTML );
369
370                // Do we want to have a "back link"?
371                if( m_emitChangeNextPreviousHyperlinks && m_count > 1 ) {
372                    m_sb.append( BACK_PRE_INDEX );
373                    m_sb.append( m_count - 1 );
374                    m_sb.append( BACK_POST_INDEX );
375                }
376
377                // An anchor for the change.
378                if (m_emitChangeNextPreviousHyperlinks) {
379                    m_sb.append( ANCHOR_PRE_INDEX );
380                    m_sb.append( m_count++ );
381                    m_sb.append( ANCHOR_POST_INDEX );
382                }
383
384                // ... has been added
385                if( m_newBuf.length() > 0 ) {
386                    m_sb.append( INSERTION_START_HTML );
387                    m_sb.append( m_newBuf );
388                    m_sb.append( INSERTION_END_HTML );
389                }
390
391                // .. has been removed
392                if( m_origBuf.length() > 0 ) {
393                    m_sb.append( DELETION_START_HTML );
394                    m_sb.append( m_origBuf );
395                    m_sb.append( DELETION_END_HTML );
396                }
397
398                // Do we want a "forward" link?
399                if( m_emitChangeNextPreviousHyperlinks && (m_index < m_max) ) {
400                    m_sb.append( FORWARD_PRE_INDEX );
401                    m_sb.append( m_count ); // Has already been incremented.
402                    m_sb.append( FORWARD_POST_INDEX );
403                }
404
405                m_sb.append( CHANGE_END_HTML );
406
407                // Nuke the buffers.
408                m_origBuf = new StringBuffer();
409                m_newBuf = new StringBuffer();
410            }
411
412            // After a flush, everything is reset.
413            m_mode = -1;
414        }
415    }
416
417}