001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import java.io.UnsupportedEncodingException;
022import java.nio.charset.StandardCharsets;
023import java.security.MessageDigest;
024import java.security.NoSuchAlgorithmException;
025import java.text.SimpleDateFormat;
026import java.util.ArrayList;
027import java.util.Calendar;
028import java.util.HashMap;
029import java.util.Map;
030import java.util.Properties;
031import java.util.Set;
032import java.util.regex.Matcher;
033import java.util.regex.Pattern;
034
035/**
036 * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions.
037 * See http://www.wikicreole.org for the WikiCreole spec.</p>
038 *
039 * <p>This translator can be configured through properties defined in
040 * jspwiki.properties starting with "creole.*". See the
041 * jspwiki.properties file for an explanation of the properties</p>
042 *
043 * <p><b>WARNING</b>: This is an experimental feature, and known to be
044 * broken.  Use at your own risk.</o>
045 *
046 *
047 * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a>
048 */
049public class CreoleToJSPWikiTranslator
050{
051
052    // These variables are expanded so that admins
053    // can display information about the current installed
054    // pagefilter
055    //
056    // The syntax is the same as a wiki var. Unlike a wiki
057    // war though, the CreoleTranslator itself
058    //
059    // [{$creolepagefilter.version}]
060    // [{$creolepagefilter.creoleversion}]
061    // [{$creolepagefilter.linebreak}] -> bloglike/wikilike
062
063    /** The version of the filter. */
064    public static final String VAR_VERSION = "1.0.3";
065
066    /** The version of Creole that this filter supports. */
067    public static final String VAR_CREOLE_VERSION = "1.0";
068
069    /** The linebreak style "bloglike". */
070    public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike";
071
072    /** The linebreak style "c2like". */
073    public static final String VAR_LINEBREAK_C2LIKE = "c2like";
074
075    private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))";
076
077    private static final String JSPWIKI_BOLD = "__$1__$3";
078
079    private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))";
080
081    private static final String JSPWIKI_ITALIC = "''$1''$3";
082
083    private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]";
084
085    private static final String JSPWIKI_SIMPLELINK = "[$1]";
086
087    private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]";
088
089    private static final String JSPWIKI_LINK = "[$2|$1]";
090
091    private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}";
092
093    private static final String JSPWIKI_HEADER_0 = "$1!!!$2";
094
095    private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}";
096
097    private static final String JSPWIKI_HEADER_1 = "$1!!!$2";
098
099    private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}";
100
101    private static final String JSPWIKI_HEADER_2 = "$1!!$2";
102
103    private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}";
104
105    private static final String JSPWIKI_HEADER_3 = "$1!$2";
106
107    private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}";
108
109    private static final String JSPWIKI_HEADER_4 = "$1__$2__";
110
111    private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}";
112
113    private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]";
114
115    private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}";
116
117    private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]";
118
119    private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]";
120
121    private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]";
122
123    private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]";
124
125    private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]";
126
127    private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E";
128
129    //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])";
130
131    //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2";
132
133    private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)";
134
135    private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>";
136
137    private static final String JSPWIKI_PLUGIN = "[{$1}]";
138
139    private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])";
140
141    private static final String HTTP_URL = "$1http://$2$3";
142
143    private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}";
144
145    private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]";
146
147    private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]";
148
149    private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]";
150
151    private static final String JSPWIKI_TABLE = "$1$2$4";
152
153    /* TODO Is it possible to use just protect :// ? */
154    private static final String URL_PROTECTED = "http://|ftp://|https://";
155
156    private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))";
157
158    private static final String SIGNATURE = "--~~~";
159
160    private static final String SIGNATURE_AND_DATE = "--~~~~";
161
162    private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd";
163
164    private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)";
165
166    private static final Map<String, String> c_protectionMap = new HashMap<>();
167
168    private        ArrayList<String> m_hashList = new ArrayList<>();
169
170    /**
171     *  I have no idea what this method does.  Could someone please tell me?
172     *
173     * @param wikiProps A property set
174     * @param content The content to translate?
175     * @param username The username in the signature?
176     * @return Probably some translated content.
177     */
178    public String translateSignature(final Properties wikiProps, final String content, final String username)
179    {
180
181        String dateFormat = wikiProps.getProperty("creole.dateFormat");
182
183        if (dateFormat == null)
184        {
185            dateFormat = DEFAULT_DATEFORMAT;
186        }
187
188        SimpleDateFormat df;
189        try
190        {
191            df = new SimpleDateFormat(dateFormat);
192        }
193        catch (final Exception e)
194        {
195            e.printStackTrace();
196            df = new SimpleDateFormat(DEFAULT_DATEFORMAT);
197        }
198
199        String result = content;
200        result = protectMarkup(result, PREFORMATTED_PROTECTED, "", "");
201        result = protectMarkup(result, URL_PROTECTED, "", "");
202
203        final Calendar cal = Calendar.getInstance();
204        result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime()));
205        result = translateElement(result, SIGNATURE, "-- [[" + username + "]]");
206        result = unprotectMarkup(result, false);
207        return result;
208    }
209
210    /**
211     *  Translates Creole markup to JSPWiki markup
212     *
213     *  @param wikiProps A set of Wiki Properties
214     *  @param content Creole markup
215     *  @return Wiki markup
216     */
217    public String translate(final Properties wikiProps, final String content)
218    {
219        final boolean blogLineBreaks = false;
220        /*
221        // BROKEN, breaks on different platforms.
222        String tmp = wikiProps.getProperty("creole.blogLineBreaks");
223        if (tmp != null)
224        {
225            if (tmp.trim().equals("true"))
226                blogLineBreaks = true;
227        }
228        */
229        final String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name");
230
231        String result = content;
232        //
233        // Breaks on OSX.  It is never a good idea to tamper with the linebreaks.  JSPWiki always
234        // stores linebreaks as \r\n, regardless of the platform.
235        //result = result.replace("\r\n", "\n");
236        //result = result.replace("\r", "\n");
237
238        /* Now protect the rest */
239        result = protectMarkup(result);
240        result = translateLists(result, "*", "-", "Nothing");
241        result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD);
242        result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC);
243        result = translateElement(result, WWW_URL, HTTP_URL);
244
245        if (imagePlugin != null && !imagePlugin.equals(""))
246        {
247            result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin);
248            result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin);
249        }
250        result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC);
251        result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK);
252        result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK);
253        result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK);
254        result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4);
255        result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3);
256        result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2);
257        result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1);
258        result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0);
259        result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE);
260        result = translateLists(result, "-", "*", "#");
261        result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE);
262        result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE);
263        result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2");
264
265        /*
266        if (blogLineBreaks)
267        {
268            result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS);
269        }
270        */
271        result = unprotectMarkup(result, true);
272
273        result = translateVariables(result, blogLineBreaks);
274        //result = result.replace("\n", System.getProperty("line.separator"));
275        return result;
276    }
277
278    /** Translates lists. */
279    private static String translateLists(final String content, final String sourceSymbol, final String targetSymbol, final String sourceSymbol2)
280    {
281        final String[] lines = content.split("\n");
282        final StringBuilder result = new StringBuilder();
283        int counter = 0;
284        int inList = -1;
285        for (int i = 0; i < lines.length; i++)
286        {
287            String line = lines[i];
288            String actSourceSymbol = "";
289            while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2))
290                   && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol)))
291            {
292                actSourceSymbol = line.substring(0, 1);
293                line = line.substring( 1 );
294                counter++;
295            }
296            if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter))
297            {
298                for (int c = 0; c < counter; c++)
299                {
300                    result.append(actSourceSymbol);
301                }
302                inList = -1;
303            }
304            else
305            {
306                for (int c = 0; c < counter; c++)
307                {
308                    if (actSourceSymbol.equals(sourceSymbol2))
309                    {
310                        result.append(sourceSymbol2);
311                    }
312                    else
313                    {
314                        result.append(targetSymbol);
315                    }
316                }
317                inList = counter;
318            }
319            result.append(line);
320            if (i < lines.length - 1)
321            {
322                result.append("\n");
323            }
324            counter = 0;
325        }
326
327        // Fixes testExtensions5
328        if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' )
329        {
330            result.append( '\n' );
331        }
332
333        return result.toString();
334    }
335
336    private String translateVariables(String result, final boolean blogLineBreaks)
337    {
338        result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION);
339        result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION);
340        final String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE;
341        result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks);
342        return result;
343    }
344
345    /**
346     * Undoes the protection. This is done by replacing the md5 hashes by the
347     * original markup.
348     *
349     * @see #protectMarkup(String)
350     */
351    private String unprotectMarkup(String content, final boolean replacePlugins)
352    {
353        final Object[] it = this.m_hashList.toArray();
354
355        for (int i = it.length - 1; i >= 0; i--)
356        {
357            final String hash = (String) it[i];
358            final String protectedMarkup = c_protectionMap.get(hash);
359            content = content.replace(hash, protectedMarkup);
360            if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 &&
361                !protectedMarkup.startsWith("{{{")))&&replacePlugins)
362                content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN);
363
364        }
365        return content;
366    }
367
368    /**
369     * Protects markup that should not be processed. For now this includes:
370     * <ul>
371     * <li>Preformatted sections, they should be ignored</li>
372     * </li>
373     * <li>Protocol strings like <code>http://</code>, they cause problems
374     * because of the <code>//</code> which is interpreted as italic</li>
375     * </ul>
376     * This protection is a simple method to keep the regular expressions for
377     * the other markup simple. Internally the protection is done by replacing
378     * the protected markup with the the md5 hash of the markup.
379     *
380     * @param content
381     * @return The content with protection
382     */
383    private String protectMarkup(String content)
384    {
385        c_protectionMap.clear();
386        m_hashList = new ArrayList<>();
387        content = protectMarkup(content, PREFORMATTED_PROTECTED, "", "");
388        content = protectMarkup(content, URL_PROTECTED, "", "");
389        content = protectMarkup(content, ESCAPE_PROTECTED, "", "");
390        content = protectMarkup(content, CREOLE_PLUGIN, "", "");
391
392        // content = protectMarkup(content, LINE_PROTECTED);
393        // content = protectMarkup(content, SIGNATURE_PROTECTED);
394        return content;
395    }
396
397    private ArrayList< String[] > readPlaceholderProperties(final Properties wikiProps)
398    {
399        final Set< Object > keySet = wikiProps.keySet();
400        final Object[] keys = keySet.toArray();
401        final ArrayList<String[]> result = new ArrayList<>();
402
403        for (final Object o : keys) {
404            final String key = o + "";
405            final String value = wikiProps.getProperty(o + "");
406            if (key.contains("creole.imagePlugin.para.%")) {
407                final String[] pair = new String[2];
408                pair[0] = key.replaceAll("creole.imagePlugin.para.%", "");
409                pair[1] = value;
410                result.add(pair);
411            }
412        }
413        return result;
414    }
415
416    private String replaceImageArea(final Properties wikiProps, final String content, final String markupRegex, final String replaceContent, final int groupPos,
417                                    final String imagePlugin)
418    {
419        final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
420        String contentCopy = content;
421
422        final ArrayList< String[] > plProperties = readPlaceholderProperties(wikiProps);
423
424        while (matcher.find())
425        {
426            String protectedMarkup = matcher.group(0);
427            final String paramsField = matcher.group(groupPos);
428            final StringBuilder paramsString = new StringBuilder();
429
430            if (paramsField != null)
431            {
432                final String[] params = paramsField.split(",");
433
434                for (final String s : params) {
435                    final String param = s.replaceAll("\\||\\s", "").toUpperCase();
436
437                    // Replace placeholder params
438                    for (final String[] pair : plProperties) {
439                        final String key = pair[0];
440                        final String value = pair[1];
441                        String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>");
442                        code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2");
443                        if (!code.equals(param)) {
444                            paramsString.append(code);
445                        }
446                    }
447
448                    // Check if it is a number
449                    try {
450                        Integer.parseInt(param);
451                        paramsString.append(" width='").append(param).append("px'");
452                    } catch (final Exception e) {
453
454                        if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null)
455                            paramsString.append(" ").append(wikiProps.getProperty("creole.imagePlugin.para." + param)
456                                    .replaceAll("^(\"|')(.*)(\"|')$", "$2"));
457                    }
458                }
459            }
460            final String temp = protectedMarkup;
461
462            protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent);
463            protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString.toString());
464            protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin);
465            protectedMarkup = protectedMarkup.replaceAll("caption=''", "");
466            protectedMarkup = protectedMarkup.replaceAll("\\s+", " ");
467
468            final int pos = contentCopy.indexOf(temp);
469            contentCopy = contentCopy.substring(0, pos) + protectedMarkup
470                          + contentCopy.substring(pos + temp.length());
471        }
472        return contentCopy;
473    }
474
475    private String replaceArea(final String content, final String markupRegex, final String replaceSource, final String replaceTarget)
476    {
477        final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
478        String contentCopy = content;
479
480        while (matcher.find())
481        {
482            String protectedMarkup = matcher.group(0);
483            final String temp = protectedMarkup;
484            protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
485            final int pos = contentCopy.indexOf(temp);
486            contentCopy = contentCopy.substring(0, pos) + protectedMarkup
487                          + contentCopy.substring(pos + temp.length() );
488        }
489        return contentCopy;
490    }
491
492    /**
493     * Protects a specific markup
494     *
495     * @see #protectMarkup(String)
496     */
497    private String protectMarkup(final String content, final String markupRegex, final String replaceSource, final String replaceTarget)
498    {
499        final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
500        final StringBuffer result = new StringBuffer();
501        while (matcher.find())
502        {
503            String protectedMarkup = matcher.group();
504            protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
505            try
506            {
507                final MessageDigest digest = MessageDigest.getInstance("MD5");
508                digest.reset();
509                digest.update(protectedMarkup.getBytes(StandardCharsets.UTF_8.name()));
510                final String hash = bytesToHash(digest.digest());
511                matcher.appendReplacement(result, hash);
512                c_protectionMap.put(hash, protectedMarkup);
513                m_hashList.add(hash);
514            }
515            catch (final NoSuchAlgorithmException e)
516            {
517                // FIXME: Should log properly
518                e.printStackTrace();
519            }
520            catch (final UnsupportedEncodingException e)
521            {
522                // FIXME: Auto-generated catch block
523                e.printStackTrace();
524            }
525        }
526        matcher.appendTail(result);
527        return result.toString();
528    }
529
530    private String bytesToHash(final byte[] b)
531    {
532        final StringBuilder hash = new StringBuilder();
533        for (final byte value : b) {
534            hash.append(Integer.toString((value & 0xff) + 0x100, 16).substring(1));
535        }
536        return hash.toString();
537    }
538
539    private String translateElement(final String content, final String fromMarkup, final String toMarkup)
540    {
541        final Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content);
542        final StringBuffer result = new StringBuffer();
543
544        while (matcher.find())
545        {
546            matcher.appendReplacement(result, toMarkup);
547        }
548        matcher.appendTail(result);
549        return result.toString();
550    }
551}