001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import java.io.UnsupportedEncodingException;
022import java.nio.charset.StandardCharsets;
023import java.security.MessageDigest;
024import java.security.NoSuchAlgorithmException;
025import java.text.SimpleDateFormat;
026import java.util.ArrayList;
027import java.util.Calendar;
028import java.util.HashMap;
029import java.util.Map;
030import java.util.Properties;
031import java.util.Set;
032import java.util.regex.Matcher;
033import java.util.regex.Pattern;
034
035/**
036 * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions.
037 * See http://www.wikicreole.org for the WikiCreole spec.</p>
038 *
039 * <p>This translator can be configured through properties defined in
040 * jspwiki.properties starting with "creole.*". See the
041 * jspwiki.properties file for an explanation of the properties</p>
042 *
043 * <p><b>WARNING</b>: This is an experimental feature, and known to be
044 * broken.  Use at your own risk.</o>
045 *
046 *
047 * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a>
048 */
049public class CreoleToJSPWikiTranslator
050{
051
052    // These variables are expanded so that admins
053    // can display information about the current installed
054    // pagefilter
055    //
056    // The syntax is the same as a wiki var. Unlike a wiki
057    // war though, the CreoleTranslator itself
058    //
059    // [{$creolepagefilter.version}]
060    // [{$creolepagefilter.creoleversion}]
061    // [{$creolepagefilter.linebreak}] -> bloglike/wikilike
062
063    /** The version of the filter. */
064    public static final String VAR_VERSION = "1.0.3";
065
066    /** The version of Creole that this filter supports. */
067    public static final String VAR_CREOLE_VERSION = "1.0";
068
069    /** The linebreak style "bloglike". */
070    public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike";
071
072    /** The linebreak style "c2like". */
073    public static final String VAR_LINEBREAK_C2LIKE = "c2like";
074
075    private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))";
076
077    private static final String JSPWIKI_BOLD = "__$1__$3";
078
079    private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))";
080
081    private static final String JSPWIKI_ITALIC = "''$1''$3";
082
083    private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]";
084
085    private static final String JSPWIKI_SIMPLELINK = "[$1]";
086
087    private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]";
088
089    private static final String JSPWIKI_LINK = "[$2|$1]";
090
091    private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}";
092
093    private static final String JSPWIKI_HEADER_0 = "$1!!!$2";
094
095    private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}";
096
097    private static final String JSPWIKI_HEADER_1 = "$1!!!$2";
098
099    private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}";
100
101    private static final String JSPWIKI_HEADER_2 = "$1!!$2";
102
103    private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}";
104
105    private static final String JSPWIKI_HEADER_3 = "$1!$2";
106
107    private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}";
108
109    private static final String JSPWIKI_HEADER_4 = "$1__$2__";
110
111    private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}";
112
113    private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]";
114
115    private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}";
116
117    private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]";
118
119    private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]";
120
121    private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]";
122
123    private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]";
124
125    private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]";
126
127    private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E";
128
129    //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])";
130
131    //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2";
132
133    private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)";
134
135    private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>";
136
137    private static final String JSPWIKI_PLUGIN = "[{$1}]";
138
139    private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])";
140
141    private static final String HTTP_URL = "$1http://$2$3";
142
143    private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}";
144
145    private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]";
146
147    private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]";
148
149    private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]";
150
151    private static final String JSPWIKI_TABLE = "$1$2$4";
152
153    /* TODO Is it possible to use just protect :// ? */
154    private static final String URL_PROTECTED = "http://|ftp://|https://";
155
156    private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))";
157
158    private static final String SIGNATURE = "--~~~";
159
160    private static final String SIGNATURE_AND_DATE = "--~~~~";
161
162    private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd";
163
164    private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)";
165
166    private static final Map<String, String> c_protectionMap = new HashMap<>();
167
168    private        ArrayList<String> m_hashList = new ArrayList<>();
169
170    /**
171     *  I have no idea what this method does.  Could someone please tell me?
172     *
173     * @param wikiProps A property set
174     * @param content The content to translate?
175     * @param username The username in the signature?
176     * @return Probably some translated content.
177     */
178    public String translateSignature(final Properties wikiProps, final String content, final String username)
179    {
180
181        String dateFormat = wikiProps.getProperty("creole.dateFormat");
182
183        if (dateFormat == null)
184        {
185            dateFormat = DEFAULT_DATEFORMAT;
186        }
187
188        SimpleDateFormat df;
189        try
190        {
191            df = new SimpleDateFormat(dateFormat);
192        }
193        catch (final Exception e)
194        {
195            e.printStackTrace();
196            df = new SimpleDateFormat(DEFAULT_DATEFORMAT);
197        }
198
199        String result = content;
200        result = protectMarkup(result, PREFORMATTED_PROTECTED, "", "");
201        result = protectMarkup(result, URL_PROTECTED, "", "");
202
203        final Calendar cal = Calendar.getInstance();
204        result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime()));
205        result = translateElement(result, SIGNATURE, "-- [[" + username + "]]");
206        result = unprotectMarkup(result, false);
207        return result;
208    }
209
210    /**
211     *  Translates Creole markup to JSPWiki markup
212     *
213     *  @param wikiProps A set of Wiki Properties
214     *  @param content Creole markup
215     *  @return Wiki markup
216     */
217    public String translate(final Properties wikiProps, final String content)
218    {
219        final boolean blogLineBreaks = false;
220        /*
221        // BROKEN, breaks on different platforms.
222        String tmp = wikiProps.getProperty("creole.blogLineBreaks");
223        if (tmp != null)
224        {
225            if (tmp.trim().equals("true"))
226                blogLineBreaks = true;
227        }
228        */
229        final String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name");
230
231        String result = content;
232        //
233        // Breaks on OSX.  It is never a good idea to tamper with the linebreaks.  JSPWiki always
234        // stores linebreaks as \r\n, regardless of the platform.
235        //result = result.replace("\r\n", "\n");
236        //result = result.replace("\r", "\n");
237
238        /* Now protect the rest */
239        result = protectMarkup(result);
240        result = translateLists(result, "*", "-", "Nothing");
241        result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD);
242        result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC);
243        result = translateElement(result, WWW_URL, HTTP_URL);
244
245        if (imagePlugin != null && !imagePlugin.equals(""))
246        {
247            result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin);
248            result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin);
249        }
250        result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC);
251        result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK);
252        result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK);
253        result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK);
254        result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4);
255        result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3);
256        result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2);
257        result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1);
258        result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0);
259        result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE);
260        result = translateLists(result, "-", "*", "#");
261        result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE);
262        result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE);
263        result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2");
264
265        /*
266        if (blogLineBreaks)
267        {
268            result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS);
269        }
270        */
271        result = unprotectMarkup(result, true);
272
273        result = translateVariables(result, blogLineBreaks);
274        //result = result.replace("\n", System.getProperty("line.separator"));
275        return result;
276    }
277
278    /** Translates lists. */
279    private static String translateLists(final String content, final String sourceSymbol, final String targetSymbol, final String sourceSymbol2)
280    {
281        final String[] lines = content.split("\n");
282        final StringBuilder result = new StringBuilder();
283        int counter = 0;
284        int inList = -1;
285        for (int i = 0; i < lines.length; i++)
286        {
287            String line = lines[i];
288            String actSourceSymbol = "";
289            while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2))
290                   && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol)))
291            {
292                actSourceSymbol = line.substring(0, 1);
293                line = line.substring( 1 );
294                counter++;
295            }
296            if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter))
297            {
298                result.append(actSourceSymbol.repeat(Math.max(0, counter)));
299                inList = -1;
300            }
301            else
302            {
303                for (int c = 0; c < counter; c++)
304                {
305                    if (actSourceSymbol.equals(sourceSymbol2))
306                    {
307                        result.append(sourceSymbol2);
308                    }
309                    else
310                    {
311                        result.append(targetSymbol);
312                    }
313                }
314                inList = counter;
315            }
316            result.append(line);
317            if (i < lines.length - 1)
318            {
319                result.append("\n");
320            }
321            counter = 0;
322        }
323
324        // Fixes testExtensions5
325        if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' )
326        {
327            result.append( '\n' );
328        }
329
330        return result.toString();
331    }
332
333    private String translateVariables(String result, final boolean blogLineBreaks)
334    {
335        result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION);
336        result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION);
337        final String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE;
338        result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks);
339        return result;
340    }
341
342    /**
343     * Undoes the protection. This is done by replacing the md5 hashes by the
344     * original markup.
345     *
346     * @see #protectMarkup(String)
347     */
348    private String unprotectMarkup(String content, final boolean replacePlugins)
349    {
350        final Object[] it = this.m_hashList.toArray();
351
352        for (int i = it.length - 1; i >= 0; i--)
353        {
354            final String hash = (String) it[i];
355            final String protectedMarkup = c_protectionMap.get(hash);
356            content = content.replace(hash, protectedMarkup);
357            if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 &&
358                !protectedMarkup.startsWith("{{{")))&&replacePlugins)
359                content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN);
360
361        }
362        return content;
363    }
364
365    /**
366     * Protects markup that should not be processed. For now this includes:
367     * <ul>
368     * <li>Preformatted sections, they should be ignored</li>
369     * </li>
370     * <li>Protocol strings like <code>http://</code>, they cause problems
371     * because of the <code>//</code> which is interpreted as italic</li>
372     * </ul>
373     * This protection is a simple method to keep the regular expressions for
374     * the other markup simple. Internally the protection is done by replacing
375     * the protected markup with the the md5 hash of the markup.
376     *
377     * @param content
378     * @return The content with protection
379     */
380    private String protectMarkup(String content)
381    {
382        c_protectionMap.clear();
383        m_hashList = new ArrayList<>();
384        content = protectMarkup(content, PREFORMATTED_PROTECTED, "", "");
385        content = protectMarkup(content, URL_PROTECTED, "", "");
386        content = protectMarkup(content, ESCAPE_PROTECTED, "", "");
387        content = protectMarkup(content, CREOLE_PLUGIN, "", "");
388
389        // content = protectMarkup(content, LINE_PROTECTED);
390        // content = protectMarkup(content, SIGNATURE_PROTECTED);
391        return content;
392    }
393
394    private ArrayList< String[] > readPlaceholderProperties(final Properties wikiProps)
395    {
396        final Set< Object > keySet = wikiProps.keySet();
397        final Object[] keys = keySet.toArray();
398        final ArrayList<String[]> result = new ArrayList<>();
399
400        for (final Object o : keys) {
401            final String key = o + "";
402            final String value = wikiProps.getProperty(o + "");
403            if (key.contains("creole.imagePlugin.para.%")) {
404                final String[] pair = new String[2];
405                pair[0] = key.replaceAll("creole.imagePlugin.para.%", "");
406                pair[1] = value;
407                result.add(pair);
408            }
409        }
410        return result;
411    }
412
413    private String replaceImageArea(final Properties wikiProps, final String content, final String markupRegex, final String replaceContent, final int groupPos,
414                                    final String imagePlugin)
415    {
416        final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
417        String contentCopy = content;
418
419        final ArrayList< String[] > plProperties = readPlaceholderProperties(wikiProps);
420
421        while (matcher.find())
422        {
423            String protectedMarkup = matcher.group(0);
424            final String paramsField = matcher.group(groupPos);
425            final StringBuilder paramsString = new StringBuilder();
426
427            if (paramsField != null)
428            {
429                final String[] params = paramsField.split(",");
430
431                for (final String s : params) {
432                    final String param = s.replaceAll("\\||\\s", "").toUpperCase();
433
434                    // Replace placeholder params
435                    for (final String[] pair : plProperties) {
436                        final String key = pair[0];
437                        final String value = pair[1];
438                        String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>");
439                        code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2");
440                        if (!code.equals(param)) {
441                            paramsString.append(code);
442                        }
443                    }
444
445                    // Check if it is a number
446                    try {
447                        Integer.parseInt(param);
448                        paramsString.append(" width='").append(param).append("px'");
449                    } catch (final Exception e) {
450
451                        if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null)
452                            paramsString.append(" ").append(wikiProps.getProperty("creole.imagePlugin.para." + param)
453                                    .replaceAll("^(\"|')(.*)(\"|')$", "$2"));
454                    }
455                }
456            }
457            final String temp = protectedMarkup;
458
459            protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent);
460            protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString.toString());
461            protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin);
462            protectedMarkup = protectedMarkup.replaceAll("caption=''", "");
463            protectedMarkup = protectedMarkup.replaceAll("\\s+", " ");
464
465            final int pos = contentCopy.indexOf(temp);
466            contentCopy = contentCopy.substring(0, pos) + protectedMarkup
467                          + contentCopy.substring(pos + temp.length());
468        }
469        return contentCopy;
470    }
471
472    private String replaceArea(final String content, final String markupRegex, final String replaceSource, final String replaceTarget)
473    {
474        final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
475        String contentCopy = content;
476
477        while (matcher.find())
478        {
479            String protectedMarkup = matcher.group(0);
480            final String temp = protectedMarkup;
481            protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
482            final int pos = contentCopy.indexOf(temp);
483            contentCopy = contentCopy.substring(0, pos) + protectedMarkup
484                          + contentCopy.substring(pos + temp.length() );
485        }
486        return contentCopy;
487    }
488
489    /**
490     * Protects a specific markup
491     *
492     * @see #protectMarkup(String)
493     */
494    private String protectMarkup(final String content, final String markupRegex, final String replaceSource, final String replaceTarget)
495    {
496        final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
497        final StringBuffer result = new StringBuffer();
498        while (matcher.find())
499        {
500            String protectedMarkup = matcher.group();
501            protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
502            try
503            {
504                final MessageDigest digest = MessageDigest.getInstance("MD5");
505                digest.reset();
506                digest.update(protectedMarkup.getBytes(StandardCharsets.UTF_8));
507                final String hash = bytesToHash(digest.digest());
508                matcher.appendReplacement(result, hash);
509                c_protectionMap.put(hash, protectedMarkup);
510                m_hashList.add(hash);
511            }
512            catch (final NoSuchAlgorithmException e)
513            {
514                // FIXME: Should log properly
515                e.printStackTrace();
516            }
517        }
518        matcher.appendTail(result);
519        return result.toString();
520    }
521
522    private String bytesToHash(final byte[] b)
523    {
524        final StringBuilder hash = new StringBuilder();
525        for (final byte value : b) {
526            hash.append(Integer.toString((value & 0xff) + 0x100, 16).substring(1));
527        }
528        return hash.toString();
529    }
530
531    private String translateElement(final String content, final String fromMarkup, final String toMarkup)
532    {
533        final Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content);
534        final StringBuffer result = new StringBuffer();
535
536        while (matcher.find())
537        {
538            matcher.appendReplacement(result, toMarkup);
539        }
540        matcher.appendTail(result);
541        return result.toString();
542    }
543}