001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.parser;
020
021import java.io.UnsupportedEncodingException;
022import java.security.MessageDigest;
023import java.security.NoSuchAlgorithmException;
024import java.text.SimpleDateFormat;
025import java.util.ArrayList;
026import java.util.Calendar;
027import java.util.HashMap;
028import java.util.Map;
029import java.util.Properties;
030import java.util.Set;
031import java.util.regex.Matcher;
032import java.util.regex.Pattern;
033
034/**
035 * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions.
036 * See http://www.wikicreole.org for the WikiCreole spec.</p>
037 *
038 * <p>This translator can be configured through properties defined in
039 * jspwiki.properties starting with "creole.*". See the
040 * jspwiki.properties file for an explanation of the properties</p>
041 *
042 * <p><b>WARNING</b>: This is an experimental feature, and known to be
043 * broken.  Use at your own risk.</o>
044 *
045 *
046 * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a>
047 */
048public class CreoleToJSPWikiTranslator
049{
050
051    // These variables are expanded so that admins
052    // can display information about the current installed
053    // pagefilter
054    //
055    // The syntax is the same as a wiki var. Unlike a wiki
056    // war though, the CreoleTranslator itself
057    //
058    // [{$creolepagefilter.version}]
059    // [{$creolepagefilter.creoleversion}]
060    // [{$creolepagefilter.linebreak}] -> bloglike/wikilike
061
062    /** The version of the filter. */
063    public static final String VAR_VERSION = "1.0.3";
064
065    /** The version of Creole that this filter supports. */
066    public static final String VAR_CREOLE_VERSION = "1.0";
067
068    /** The linebreak style "bloglike". */
069    public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike";
070
071    /** The linebreak style "c2like". */
072    public static final String VAR_LINEBREAK_C2LIKE = "c2like";
073
074    private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))";
075
076    private static final String JSPWIKI_BOLD = "__$1__$3";
077
078    private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))";
079
080    private static final String JSPWIKI_ITALIC = "''$1''$3";
081
082    private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]";
083
084    private static final String JSPWIKI_SIMPLELINK = "[$1]";
085
086    private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]";
087
088    private static final String JSPWIKI_LINK = "[$2|$1]";
089
090    private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}";
091
092    private static final String JSPWIKI_HEADER_0 = "$1!!!$2";
093
094    private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}";
095
096    private static final String JSPWIKI_HEADER_1 = "$1!!!$2";
097
098    private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}";
099
100    private static final String JSPWIKI_HEADER_2 = "$1!!$2";
101
102    private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}";
103
104    private static final String JSPWIKI_HEADER_3 = "$1!$2";
105
106    private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}";
107
108    private static final String JSPWIKI_HEADER_4 = "$1__$2__";
109
110    private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}";
111
112    private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]";
113
114    private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}";
115
116    private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]";
117
118    private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]";
119
120    private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]";
121
122    private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]";
123
124    private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]";
125
126    private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E";
127
128    //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])";
129
130    //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2";
131
132    private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)";
133
134    private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>";
135
136    private static final String JSPWIKI_PLUGIN = "[{$1}]";
137
138    private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])";
139
140    private static final String HTTP_URL = "$1http://$2$3";
141
142    private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}";
143
144    private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]";
145
146    private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]";
147
148    private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]";
149
150    private static final String JSPWIKI_TABLE = "$1$2$4";
151
152    /* TODO Is it possible to use just protect :// ? */
153    private static final String URL_PROTECTED = "http://|ftp://|https://";
154
155    private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))";
156
157    private static final String SIGNATURE = "--~~~";
158
159    private static final String SIGNATURE_AND_DATE = "--~~~~";
160
161    private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd";
162
163    private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)";
164
165    private static Map<String, String> c_protectionMap = new HashMap<String, String>();
166
167    private        ArrayList<String> m_hashList = new ArrayList<String>();
168
169    /**
170     *  I have no idea what this method does.  Could someone please tell me?
171     *
172     * @param wikiProps A property set
173     * @param content The content to translate?
174     * @param username The username in the signature?
175     * @return Probably some translated content.
176     */
177    public String translateSignature(Properties wikiProps, final String content, String username)
178    {
179
180        String dateFormat = wikiProps.getProperty("creole.dateFormat");
181
182        if (dateFormat == null)
183        {
184            dateFormat = DEFAULT_DATEFORMAT;
185        }
186
187        SimpleDateFormat df = null;
188        try
189        {
190            df = new SimpleDateFormat(dateFormat);
191        }
192        catch (Exception e)
193        {
194            e.printStackTrace();
195            df = new SimpleDateFormat(DEFAULT_DATEFORMAT);
196        }
197
198        String result = content;
199        result = protectMarkup(result, PREFORMATTED_PROTECTED, "", "");
200        result = protectMarkup(result, URL_PROTECTED, "", "");
201
202        Calendar cal = Calendar.getInstance();
203        result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime()));
204        result = translateElement(result, SIGNATURE, "-- [[" + username + "]]");
205        result = unprotectMarkup(result, false);
206        return result;
207    }
208
209    /**
210     *  Translates Creole markup to JSPWiki markup
211     *
212     *  @param wikiProps A set of Wiki Properties
213     *  @param content Creole markup
214     *  @return Wiki markup
215     */
216    public String translate(Properties wikiProps, final String content)
217    {
218        boolean blogLineBreaks = false;
219        /*
220        // BROKEN, breaks on different platforms.
221        String tmp = wikiProps.getProperty("creole.blogLineBreaks");
222        if (tmp != null)
223        {
224            if (tmp.trim().equals("true"))
225                blogLineBreaks = true;
226        }
227        */
228        String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name");
229
230        String result = content;
231        //
232        // Breaks on OSX.  It is never a good idea to tamper with the linebreaks.  JSPWiki always
233        // stores linebreaks as \r\n, regardless of the platform.
234        //result = result.replace("\r\n", "\n");
235        //result = result.replace("\r", "\n");
236
237        /* Now protect the rest */
238        result = protectMarkup(result);
239        result = translateLists(result, "*", "-", "Nothing");
240        result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD);
241        result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC);
242        result = translateElement(result, WWW_URL, HTTP_URL);
243
244        if (imagePlugin != null && !imagePlugin.equals(""))
245        {
246            result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin);
247            result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin);
248        }
249        result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC);
250        result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK);
251        result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK);
252        result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK);
253        result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4);
254        result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3);
255        result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2);
256        result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1);
257        result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0);
258        result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE);
259        result = translateLists(result, "-", "*", "#");
260        result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE);
261        result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE);
262        result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2");
263
264        /*
265        if (blogLineBreaks)
266        {
267            result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS);
268        }
269        */
270        result = unprotectMarkup(result, true);
271
272        result = translateVariables(result, blogLineBreaks);
273        //result = result.replace("\n", System.getProperty("line.separator"));
274        return result;
275    }
276
277    /** Translates lists. */
278    private static String translateLists(String content, String sourceSymbol, String targetSymbol, String sourceSymbol2)
279    {
280        String[] lines = content.split("\n");
281        StringBuilder result = new StringBuilder();
282        int counter = 0;
283        int inList = -1;
284        for (int i = 0; i < lines.length; i++)
285        {
286            String line = lines[i];
287            String actSourceSymbol = "";
288            while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2))
289                   && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol)))
290            {
291                actSourceSymbol = line.substring(0, 1);
292                line = line.substring(1, line.length());
293                counter++;
294            }
295            if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter))
296            {
297                for (int c = 0; c < counter; c++)
298                {
299                    result.append(actSourceSymbol);
300                }
301                inList = -1;
302            }
303            else
304            {
305                for (int c = 0; c < counter; c++)
306                {
307                    if (actSourceSymbol.equals(sourceSymbol2))
308                    {
309                        result.append(sourceSymbol2);
310                    }
311                    else
312                    {
313                        result.append(targetSymbol);
314                    }
315                }
316                inList = counter;
317            }
318            result.append(line);
319            if (i < lines.length - 1)
320            {
321                result.append("\n");
322            }
323            counter = 0;
324        }
325
326        // Fixes testExtensions5
327        if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' )
328        {
329            result.append( '\n' );
330        }
331
332        return result.toString();
333    }
334
335    private String translateVariables(String result, boolean blogLineBreaks)
336    {
337        result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION);
338        result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION);
339        String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE;
340        result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks);
341        return result;
342    }
343
344    /**
345     * Undoes the protection. This is done by replacing the md5 hashes by the
346     * original markup.
347     *
348     * @see #protectMarkup(String)
349     */
350    private String unprotectMarkup(String content,boolean replacePlugins)
351    {
352        Object[] it = this.m_hashList.toArray();
353
354        for (int i = it.length - 1; i >= 0; i--)
355        {
356            String hash = (String) it[i];
357            String protectedMarkup = c_protectionMap.get(hash);
358            content = content.replace(hash, protectedMarkup);
359            if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 &&
360                !protectedMarkup.substring(0, 3).equals("{{{")))&&replacePlugins)
361                content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN);
362
363        }
364        return content;
365    }
366
367    /**
368     * Protects markup that should not be processed. For now this includes:
369     * <ul>
370     * <li>Preformatted sections, they should be ignored</li>
371     * </li>
372     * <li>Protocol strings like <code>http://</code>, they cause problems
373     * because of the <code>//</code> which is interpreted as italic</li>
374     * </ul>
375     * This protection is a simple method to keep the regular expressions for
376     * the other markup simple. Internally the protection is done by replacing
377     * the protected markup with the the md5 hash of the markup.
378     *
379     * @param content
380     * @return The content with protection
381     */
382    private String protectMarkup(String content)
383    {
384        c_protectionMap.clear();
385        m_hashList = new ArrayList<String>();
386        content = protectMarkup(content, PREFORMATTED_PROTECTED, "", "");
387        content = protectMarkup(content, URL_PROTECTED, "", "");
388        content = protectMarkup(content, ESCAPE_PROTECTED, "", "");
389        content = protectMarkup(content, CREOLE_PLUGIN, "", "");
390
391        // content = protectMarkup(content, LINE_PROTECTED);
392        // content = protectMarkup(content, SIGNATURE_PROTECTED);
393        return content;
394    }
395
396    private ArrayList< String[] > readPlaceholderProperties(Properties wikiProps)
397    {
398        Set< Object > keySet = wikiProps.keySet();
399        Object[] keys = keySet.toArray();
400        ArrayList<String[]> result = new ArrayList<String[]>();
401
402        for( int i = 0; i < keys.length; i++ )
403        {
404            String key = keys[i] + "";
405            String value = wikiProps.getProperty( keys[i] + "" );
406            if( key.indexOf( "creole.imagePlugin.para.%" ) > -1 )
407            {
408                String[] pair = new String[2];
409                pair[0] = key.replaceAll( "creole.imagePlugin.para.%", "" );
410                pair[1] = value;
411                result.add( pair );
412            }
413        }
414        return result;
415    }
416
417    private String replaceImageArea(Properties wikiProps, String content, String markupRegex, String replaceContent, int groupPos,
418                                    String imagePlugin)
419    {
420        Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
421        String contentCopy = content;
422
423        ArrayList< String[] > plProperties = readPlaceholderProperties(wikiProps);
424
425        while (matcher.find())
426        {
427            String protectedMarkup = matcher.group(0);
428            String paramsField = matcher.group(groupPos);
429            String paramsString = "";
430
431            if (paramsField != null)
432            {
433                String[] params = paramsField.split(",");
434
435                for (int i = 0; i < params.length; i++)
436                {
437                    String param = params[i].replaceAll("\\||\\s", "").toUpperCase();
438
439                    // Replace placeholder params
440                    for (int j = 0; j < plProperties.size(); j++)
441                    {
442                        String[] pair = plProperties.get(j);
443                        String key = pair[0];
444                        String value = pair[1];
445                        String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>");
446                        code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2");
447                        if (!code.equals(param)) {
448                            paramsString += code;
449                        }
450                    }
451
452                    // Check if it is a number
453                    try
454                    {
455                        Integer.parseInt(param);
456                        paramsString += " width='" + param + "px'";
457                    }
458                    catch (Exception e)
459                    {
460
461                        if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null)
462                            paramsString += " "
463                                            + wikiProps.getProperty("creole.imagePlugin.para." + param)
464                                                .replaceAll("^(\"|')(.*)(\"|')$", "$2");
465                    }
466                }
467            }
468            String temp = protectedMarkup;
469
470            protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent);
471            protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString);
472            protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin);
473            protectedMarkup = protectedMarkup.replaceAll("caption=''", "");
474            protectedMarkup = protectedMarkup.replaceAll("\\s+", " ");
475
476            int pos = contentCopy.indexOf(temp);
477            contentCopy = contentCopy.substring(0, pos) + protectedMarkup
478                          + contentCopy.substring(pos + temp.length(), contentCopy.length());
479        }
480        return contentCopy;
481    }
482
483    private String replaceArea(String content, String markupRegex, String replaceSource, String replaceTarget)
484    {
485        Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
486        String contentCopy = content;
487
488        while (matcher.find())
489        {
490            String protectedMarkup = matcher.group(0);
491            String temp = protectedMarkup;
492            protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
493            int pos = contentCopy.indexOf(temp);
494            contentCopy = contentCopy.substring(0, pos) + protectedMarkup
495                          + contentCopy.substring(pos + temp.length(), contentCopy.length());
496        }
497        return contentCopy;
498    }
499
500    /**
501     * Protects a specific markup
502     *
503     * @see #protectMarkup(String)
504     */
505    private String protectMarkup(String content, String markupRegex, String replaceSource, String replaceTarget)
506    {
507        Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
508        StringBuffer result = new StringBuffer();
509        while (matcher.find())
510        {
511            String protectedMarkup = matcher.group();
512            protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
513            try
514            {
515                MessageDigest digest = MessageDigest.getInstance("MD5");
516                digest.reset();
517                digest.update(protectedMarkup.getBytes("UTF-8"));
518                String hash = bytesToHash(digest.digest());
519                matcher.appendReplacement(result, hash);
520                c_protectionMap.put(hash, protectedMarkup);
521                m_hashList.add(hash);
522            }
523            catch (NoSuchAlgorithmException e)
524            {
525                // FIXME: Should log properly
526                e.printStackTrace();
527            }
528            catch (UnsupportedEncodingException e)
529            {
530                // FIXME: Auto-generated catch block
531                e.printStackTrace();
532            }
533        }
534        matcher.appendTail(result);
535        return result.toString();
536    }
537
538    private String bytesToHash(byte[] b)
539    {
540        String hash = "";
541        for (int i = 0; i < b.length; i++)
542        {
543            hash += Integer.toString((b[i] & 0xff) + 0x100, 16).substring(1);
544        }
545        return hash;
546    }
547
548    private String translateElement(String content, String fromMarkup, String toMarkup)
549    {
550        Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content);
551        StringBuffer result = new StringBuffer();
552
553        while (matcher.find())
554        {
555            matcher.appendReplacement(result, toMarkup);
556        }
557        matcher.appendTail(result);
558        return result.toString();
559    }
560}