001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import java.io.UnsupportedEncodingException;
022import java.nio.charset.StandardCharsets;
023import java.security.MessageDigest;
024import java.security.NoSuchAlgorithmException;
025import java.text.SimpleDateFormat;
026import java.util.ArrayList;
027import java.util.Calendar;
028import java.util.HashMap;
029import java.util.Map;
030import java.util.Properties;
031import java.util.Set;
032import java.util.regex.Matcher;
033import java.util.regex.Pattern;
034
035/**
036 * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions.
037 * See http://www.wikicreole.org for the WikiCreole spec.</p>
038 *
039 * <p>This translator can be configured through properties defined in
040 * jspwiki.properties starting with "creole.*". See the
041 * jspwiki.properties file for an explanation of the properties</p>
042 *
043 * <p><b>WARNING</b>: This is an experimental feature, and known to be
044 * broken.  Use at your own risk.</o>
045 *
046 *
047 * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a>
048 */
049public class CreoleToJSPWikiTranslator
050{
051
052    // These variables are expanded so that admins
053    // can display information about the current installed
054    // pagefilter
055    //
056    // The syntax is the same as a wiki var. Unlike a wiki
057    // war though, the CreoleTranslator itself
058    //
059    // [{$creolepagefilter.version}]
060    // [{$creolepagefilter.creoleversion}]
061    // [{$creolepagefilter.linebreak}] -> bloglike/wikilike
062
063    /** The version of the filter. */
064    public static final String VAR_VERSION = "1.0.3";
065
066    /** The version of Creole that this filter supports. */
067    public static final String VAR_CREOLE_VERSION = "1.0";
068
069    /** The linebreak style "bloglike". */
070    public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike";
071
072    /** The linebreak style "c2like". */
073    public static final String VAR_LINEBREAK_C2LIKE = "c2like";
074
075    private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))";
076
077    private static final String JSPWIKI_BOLD = "__$1__$3";
078
079    private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))";
080
081    private static final String JSPWIKI_ITALIC = "''$1''$3";
082
083    private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]";
084
085    private static final String JSPWIKI_SIMPLELINK = "[$1]";
086
087    private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]";
088
089    private static final String JSPWIKI_LINK = "[$2|$1]";
090
091    private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}";
092
093    private static final String JSPWIKI_HEADER_0 = "$1!!!$2";
094
095    private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}";
096
097    private static final String JSPWIKI_HEADER_1 = "$1!!!$2";
098
099    private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}";
100
101    private static final String JSPWIKI_HEADER_2 = "$1!!$2";
102
103    private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}";
104
105    private static final String JSPWIKI_HEADER_3 = "$1!$2";
106
107    private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}";
108
109    private static final String JSPWIKI_HEADER_4 = "$1__$2__";
110
111    private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}";
112
113    private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]";
114
115    private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}";
116
117    private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]";
118
119    private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]";
120
121    private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]";
122
123    private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]";
124
125    private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]";
126
127    private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E";
128
129    //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])";
130
131    //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2";
132
133    private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)";
134
135    private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>";
136
137    private static final String JSPWIKI_PLUGIN = "[{$1}]";
138
139    private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])";
140
141    private static final String HTTP_URL = "$1http://$2$3";
142
143    private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}";
144
145    private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]";
146
147    private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]";
148
149    private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]";
150
151    private static final String JSPWIKI_TABLE = "$1$2$4";
152
153    /* TODO Is it possible to use just protect :// ? */
154    private static final String URL_PROTECTED = "http://|ftp://|https://";
155
156    private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))";
157
158    private static final String SIGNATURE = "--~~~";
159
160    private static final String SIGNATURE_AND_DATE = "--~~~~";
161
162    private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd";
163
164    private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)";
165
166    private static final Map<String, String> c_protectionMap = new HashMap<String, String>();
167
168    private        ArrayList<String> m_hashList = new ArrayList<String>();
169
170    /**
171     *  I have no idea what this method does.  Could someone please tell me?
172     *
173     * @param wikiProps A property set
174     * @param content The content to translate?
175     * @param username The username in the signature?
176     * @return Probably some translated content.
177     */
178    public String translateSignature(final Properties wikiProps, final String content, final String username)
179    {
180
181        String dateFormat = wikiProps.getProperty("creole.dateFormat");
182
183        if (dateFormat == null)
184        {
185            dateFormat = DEFAULT_DATEFORMAT;
186        }
187
188        SimpleDateFormat df = null;
189        try
190        {
191            df = new SimpleDateFormat(dateFormat);
192        }
193        catch (final Exception e)
194        {
195            e.printStackTrace();
196            df = new SimpleDateFormat(DEFAULT_DATEFORMAT);
197        }
198
199        String result = content;
200        result = protectMarkup(result, PREFORMATTED_PROTECTED, "", "");
201        result = protectMarkup(result, URL_PROTECTED, "", "");
202
203        final Calendar cal = Calendar.getInstance();
204        result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime()));
205        result = translateElement(result, SIGNATURE, "-- [[" + username + "]]");
206        result = unprotectMarkup(result, false);
207        return result;
208    }
209
210    /**
211     *  Translates Creole markup to JSPWiki markup
212     *
213     *  @param wikiProps A set of Wiki Properties
214     *  @param content Creole markup
215     *  @return Wiki markup
216     */
217    public String translate(final Properties wikiProps, final String content)
218    {
219        final boolean blogLineBreaks = false;
220        /*
221        // BROKEN, breaks on different platforms.
222        String tmp = wikiProps.getProperty("creole.blogLineBreaks");
223        if (tmp != null)
224        {
225            if (tmp.trim().equals("true"))
226                blogLineBreaks = true;
227        }
228        */
229        final String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name");
230
231        String result = content;
232        //
233        // Breaks on OSX.  It is never a good idea to tamper with the linebreaks.  JSPWiki always
234        // stores linebreaks as \r\n, regardless of the platform.
235        //result = result.replace("\r\n", "\n");
236        //result = result.replace("\r", "\n");
237
238        /* Now protect the rest */
239        result = protectMarkup(result);
240        result = translateLists(result, "*", "-", "Nothing");
241        result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD);
242        result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC);
243        result = translateElement(result, WWW_URL, HTTP_URL);
244
245        if (imagePlugin != null && !imagePlugin.equals(""))
246        {
247            result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin);
248            result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin);
249        }
250        result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC);
251        result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK);
252        result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK);
253        result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK);
254        result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4);
255        result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3);
256        result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2);
257        result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1);
258        result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0);
259        result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE);
260        result = translateLists(result, "-", "*", "#");
261        result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE);
262        result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE);
263        result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2");
264
265        /*
266        if (blogLineBreaks)
267        {
268            result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS);
269        }
270        */
271        result = unprotectMarkup(result, true);
272
273        result = translateVariables(result, blogLineBreaks);
274        //result = result.replace("\n", System.getProperty("line.separator"));
275        return result;
276    }
277
278    /** Translates lists. */
279    private static String translateLists(final String content, final String sourceSymbol, final String targetSymbol, final String sourceSymbol2)
280    {
281        final String[] lines = content.split("\n");
282        final StringBuilder result = new StringBuilder();
283        int counter = 0;
284        int inList = -1;
285        for (int i = 0; i < lines.length; i++)
286        {
287            String line = lines[i];
288            String actSourceSymbol = "";
289            while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2))
290                   && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol)))
291            {
292                actSourceSymbol = line.substring(0, 1);
293                line = line.substring( 1 );
294                counter++;
295            }
296            if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter))
297            {
298                for (int c = 0; c < counter; c++)
299                {
300                    result.append(actSourceSymbol);
301                }
302                inList = -1;
303            }
304            else
305            {
306                for (int c = 0; c < counter; c++)
307                {
308                    if (actSourceSymbol.equals(sourceSymbol2))
309                    {
310                        result.append(sourceSymbol2);
311                    }
312                    else
313                    {
314                        result.append(targetSymbol);
315                    }
316                }
317                inList = counter;
318            }
319            result.append(line);
320            if (i < lines.length - 1)
321            {
322                result.append("\n");
323            }
324            counter = 0;
325        }
326
327        // Fixes testExtensions5
328        if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' )
329        {
330            result.append( '\n' );
331        }
332
333        return result.toString();
334    }
335
336    private String translateVariables(String result, final boolean blogLineBreaks)
337    {
338        result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION);
339        result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION);
340        final String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE;
341        result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks);
342        return result;
343    }
344
345    /**
346     * Undoes the protection. This is done by replacing the md5 hashes by the
347     * original markup.
348     *
349     * @see #protectMarkup(String)
350     */
351    private String unprotectMarkup(String content, final boolean replacePlugins)
352    {
353        final Object[] it = this.m_hashList.toArray();
354
355        for (int i = it.length - 1; i >= 0; i--)
356        {
357            final String hash = (String) it[i];
358            final String protectedMarkup = c_protectionMap.get(hash);
359            content = content.replace(hash, protectedMarkup);
360            if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 &&
361                !protectedMarkup.startsWith("{{{")))&&replacePlugins)
362                content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN);
363
364        }
365        return content;
366    }
367
368    /**
369     * Protects markup that should not be processed. For now this includes:
370     * <ul>
371     * <li>Preformatted sections, they should be ignored</li>
372     * </li>
373     * <li>Protocol strings like <code>http://</code>, they cause problems
374     * because of the <code>//</code> which is interpreted as italic</li>
375     * </ul>
376     * This protection is a simple method to keep the regular expressions for
377     * the other markup simple. Internally the protection is done by replacing
378     * the protected markup with the the md5 hash of the markup.
379     *
380     * @param content
381     * @return The content with protection
382     */
383    private String protectMarkup(String content)
384    {
385        c_protectionMap.clear();
386        m_hashList = new ArrayList<String>();
387        content = protectMarkup(content, PREFORMATTED_PROTECTED, "", "");
388        content = protectMarkup(content, URL_PROTECTED, "", "");
389        content = protectMarkup(content, ESCAPE_PROTECTED, "", "");
390        content = protectMarkup(content, CREOLE_PLUGIN, "", "");
391
392        // content = protectMarkup(content, LINE_PROTECTED);
393        // content = protectMarkup(content, SIGNATURE_PROTECTED);
394        return content;
395    }
396
397    private ArrayList< String[] > readPlaceholderProperties(final Properties wikiProps)
398    {
399        final Set< Object > keySet = wikiProps.keySet();
400        final Object[] keys = keySet.toArray();
401        final ArrayList<String[]> result = new ArrayList<String[]>();
402
403        for( int i = 0; i < keys.length; i++ )
404        {
405            final String key = keys[i] + "";
406            final String value = wikiProps.getProperty( keys[i] + "" );
407            if( key.indexOf( "creole.imagePlugin.para.%" ) > -1 )
408            {
409                final String[] pair = new String[2];
410                pair[0] = key.replaceAll( "creole.imagePlugin.para.%", "" );
411                pair[1] = value;
412                result.add( pair );
413            }
414        }
415        return result;
416    }
417
418    private String replaceImageArea(final Properties wikiProps, final String content, final String markupRegex, final String replaceContent, final int groupPos,
419                                    final String imagePlugin)
420    {
421        final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
422        String contentCopy = content;
423
424        final ArrayList< String[] > plProperties = readPlaceholderProperties(wikiProps);
425
426        while (matcher.find())
427        {
428            String protectedMarkup = matcher.group(0);
429            final String paramsField = matcher.group(groupPos);
430            final StringBuilder paramsString = new StringBuilder();
431
432            if (paramsField != null)
433            {
434                final String[] params = paramsField.split(",");
435
436                for (int i = 0; i < params.length; i++)
437                {
438                    final String param = params[i].replaceAll("\\||\\s", "").toUpperCase();
439
440                    // Replace placeholder params
441                    for (int j = 0; j < plProperties.size(); j++)
442                    {
443                        final String[] pair = plProperties.get(j);
444                        final String key = pair[0];
445                        final String value = pair[1];
446                        String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>");
447                        code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2");
448                        if (!code.equals(param)) {
449                            paramsString.append(code);
450                        }
451                    }
452
453                    // Check if it is a number
454                    try
455                    {
456                        Integer.parseInt(param);
457                        paramsString.append(" width='").append(param).append("px'");
458                    }
459                    catch (final Exception e)
460                    {
461
462                        if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null)
463                            paramsString.append(" ").append(wikiProps.getProperty("creole.imagePlugin.para." + param)
464                                    .replaceAll("^(\"|')(.*)(\"|')$", "$2"));
465                    }
466                }
467            }
468            final String temp = protectedMarkup;
469
470            protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent);
471            protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString.toString());
472            protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin);
473            protectedMarkup = protectedMarkup.replaceAll("caption=''", "");
474            protectedMarkup = protectedMarkup.replaceAll("\\s+", " ");
475
476            final int pos = contentCopy.indexOf(temp);
477            contentCopy = contentCopy.substring(0, pos) + protectedMarkup
478                          + contentCopy.substring(pos + temp.length());
479        }
480        return contentCopy;
481    }
482
483    private String replaceArea(final String content, final String markupRegex, final String replaceSource, final String replaceTarget)
484    {
485        final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
486        String contentCopy = content;
487
488        while (matcher.find())
489        {
490            String protectedMarkup = matcher.group(0);
491            final String temp = protectedMarkup;
492            protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
493            final int pos = contentCopy.indexOf(temp);
494            contentCopy = contentCopy.substring(0, pos) + protectedMarkup
495                          + contentCopy.substring(pos + temp.length() );
496        }
497        return contentCopy;
498    }
499
500    /**
501     * Protects a specific markup
502     *
503     * @see #protectMarkup(String)
504     */
505    private String protectMarkup(final String content, final String markupRegex, final String replaceSource, final String replaceTarget)
506    {
507        final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
508        final StringBuffer result = new StringBuffer();
509        while (matcher.find())
510        {
511            String protectedMarkup = matcher.group();
512            protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
513            try
514            {
515                final MessageDigest digest = MessageDigest.getInstance("MD5");
516                digest.reset();
517                digest.update(protectedMarkup.getBytes(StandardCharsets.UTF_8.name()));
518                final String hash = bytesToHash(digest.digest());
519                matcher.appendReplacement(result, hash);
520                c_protectionMap.put(hash, protectedMarkup);
521                m_hashList.add(hash);
522            }
523            catch (final NoSuchAlgorithmException e)
524            {
525                // FIXME: Should log properly
526                e.printStackTrace();
527            }
528            catch (final UnsupportedEncodingException e)
529            {
530                // FIXME: Auto-generated catch block
531                e.printStackTrace();
532            }
533        }
534        matcher.appendTail(result);
535        return result.toString();
536    }
537
538    private String bytesToHash(final byte[] b)
539    {
540        final StringBuilder hash = new StringBuilder();
541        for (int i = 0; i < b.length; i++)
542        {
543            hash.append(Integer.toString((b[i] & 0xff) + 0x100, 16).substring(1));
544        }
545        return hash.toString();
546    }
547
548    private String translateElement(final String content, final String fromMarkup, final String toMarkup)
549    {
550        final Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content);
551        final StringBuffer result = new StringBuffer();
552
553        while (matcher.find())
554        {
555            matcher.appendReplacement(result, toMarkup);
556        }
557        matcher.appendTail(result);
558        return result.toString();
559    }
560}