001    /* 
002        Licensed to the Apache Software Foundation (ASF) under one
003        or more contributor license agreements.  See the NOTICE file
004        distributed with this work for additional information
005        regarding copyright ownership.  The ASF licenses this file
006        to you under the Apache License, Version 2.0 (the
007        "License"); you may not use this file except in compliance
008        with the License.  You may obtain a copy of the License at
009    
010           http://www.apache.org/licenses/LICENSE-2.0
011    
012        Unless required by applicable law or agreed to in writing,
013        software distributed under the License is distributed on an
014        "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015        KIND, either express or implied.  See the License for the
016        specific language governing permissions and limitations
017        under the License.  
018     */
019    package org.apache.wiki.parser;
020    
021    import java.io.UnsupportedEncodingException;
022    import java.security.MessageDigest;
023    import java.security.NoSuchAlgorithmException;
024    import java.text.SimpleDateFormat;
025    import java.util.ArrayList;
026    import java.util.Calendar;
027    import java.util.HashMap;
028    import java.util.Map;
029    import java.util.Properties;
030    import java.util.Set;
031    import java.util.regex.Matcher;
032    import java.util.regex.Pattern;
033    
034    /**
035     * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions.
036     * See http://www.wikicreole.org for the WikiCreole spec.</p>
037     *
038     * <p>This translator can be configured through properties defined in
039     * jspwiki.properties starting with "creole.*". See the
040     * jspwiki.properties file for an explanation of the properties</p>
041     *
042     * <p><b>WARNING</b>: This is an experimental feature, and known to be
043     * broken.  Use at your own risk.</o>
044     *
045     *
046     * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a>
047     */
048    public class CreoleToJSPWikiTranslator
049    {
050    
051        // These variables are expanded so that admins
052        // can display information about the current installed
053        // pagefilter
054        //
055        // The syntax is the same as a wiki var. Unlike a wiki
056        // war though, the CreoleTranslator itself
057        //
058        // [{$creolepagefilter.version}]
059        // [{$creolepagefilter.creoleversion}]
060        // [{$creolepagefilter.linebreak}] -> bloglike/wikilike
061    
062        /** The version of the filter. */
063        public static final String VAR_VERSION = "1.0.3";
064    
065        /** The version of Creole that this filter supports. */
066        public static final String VAR_CREOLE_VERSION = "1.0";
067    
068        /** The linebreak style "bloglike". */
069        public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike";
070    
071        /** The linebreak style "c2like". */
072        public static final String VAR_LINEBREAK_C2LIKE = "c2like";
073    
074        private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))";
075    
076        private static final String JSPWIKI_BOLD = "__$1__$3";
077    
078        private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))";
079    
080        private static final String JSPWIKI_ITALIC = "''$1''$3";
081    
082        private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]";
083    
084        private static final String JSPWIKI_SIMPLELINK = "[$1]";
085    
086        private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]";
087    
088        private static final String JSPWIKI_LINK = "[$2|$1]";
089    
090        private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}";
091    
092        private static final String JSPWIKI_HEADER_0 = "$1!!!$2";
093    
094        private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}";
095    
096        private static final String JSPWIKI_HEADER_1 = "$1!!!$2";
097    
098        private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}";
099    
100        private static final String JSPWIKI_HEADER_2 = "$1!!$2";
101    
102        private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}";
103    
104        private static final String JSPWIKI_HEADER_3 = "$1!$2";
105    
106        private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}";
107    
108        private static final String JSPWIKI_HEADER_4 = "$1__$2__";
109    
110        private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}";
111    
112        private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]";
113    
114        private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}";
115    
116        private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]";
117    
118        private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]";
119    
120        private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]";
121    
122        private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]";
123    
124        private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]";
125    
126        private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E";
127    
128        //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])";
129    
130        //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2";
131    
132        private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)";
133    
134        private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>";
135    
136        private static final String JSPWIKI_PLUGIN = "[{$1}]";
137    
138        private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])";
139    
140        private static final String HTTP_URL = "$1http://$2$3";
141    
142        private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}";
143    
144        private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]";
145    
146        private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]";
147    
148        private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]";
149    
150        private static final String JSPWIKI_TABLE = "$1$2$4";
151    
152        /* TODO Is it possible to use just protect :// ? */
153        private static final String URL_PROTECTED = "http://|ftp://|https://";
154    
155        private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))";
156    
157        private static final String SIGNATURE = "--~~~";
158    
159        private static final String SIGNATURE_AND_DATE = "--~~~~";
160    
161        private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd";
162    
163        private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)";
164    
165        private static Map<String, String> c_protectionMap = new HashMap<String, String>();
166    
167        private        ArrayList<String> m_hashList = new ArrayList<String>();
168    
169        /**
170         *  I have no idea what this method does.  Could someone please tell me?
171         *  
172         * @param wikiProps A property set
173         * @param content The content to translate?
174         * @param username The username in the signature?
175         * @return Probably some translated content.
176         */
177        public String translateSignature(Properties wikiProps, final String content, String username)
178        {
179    
180            String dateFormat = wikiProps.getProperty("creole.dateFormat");
181    
182            if (dateFormat == null)
183            {
184                dateFormat = DEFAULT_DATEFORMAT;
185            }
186    
187            SimpleDateFormat df = null;
188            try
189            {
190                df = new SimpleDateFormat(dateFormat);
191            }
192            catch (Exception e)
193            {
194                e.printStackTrace();
195                df = new SimpleDateFormat(DEFAULT_DATEFORMAT);
196            }
197    
198            String result = content;
199            result = protectMarkup(result, PREFORMATTED_PROTECTED, "", "");
200            result = protectMarkup(result, URL_PROTECTED, "", "");
201    
202            Calendar cal = Calendar.getInstance();
203            result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime()));
204            result = translateElement(result, SIGNATURE, "-- [[" + username + "]]");
205            result = unprotectMarkup(result, false);
206            return result;
207        }
208    
209        /** 
210         *  Translates Creole markup to JSPWiki markup 
211         *  
212         *  @param wikiProps A set of Wiki Properties
213         *  @param content Creole markup
214         *  @return Wiki markup
215         */
216        public String translate(Properties wikiProps, final String content)
217        {
218            boolean blogLineBreaks = false;
219            /*
220            // BROKEN, breaks on different platforms.
221            String tmp = wikiProps.getProperty("creole.blogLineBreaks");
222            if (tmp != null)
223            {
224                if (tmp.trim().equals("true"))
225                    blogLineBreaks = true;
226            }
227            */
228            String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name");
229    
230            String result = content;
231            //
232            // Breaks on OSX.  It is never a good idea to tamper with the linebreaks.  JSPWiki always
233            // stores linebreaks as \r\n, regardless of the platform.
234            //result = result.replace("\r\n", "\n");
235            //result = result.replace("\r", "\n");
236    
237            /* Now protect the rest */
238            result = protectMarkup(result);
239            result = translateLists(result, "*", "-", "Nothing");
240            result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD);
241            result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC);
242            result = translateElement(result, WWW_URL, HTTP_URL);
243    
244            if (imagePlugin != null && !imagePlugin.equals(""))
245            {
246                result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin);
247                result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin);
248            }
249            result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC);
250            result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK);
251            result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK);
252            result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK);
253            result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4);
254            result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3);
255            result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2);
256            result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1);
257            result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0);
258            result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE);
259            result = translateLists(result, "-", "*", "#");
260            result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE);
261            result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE);
262            result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2");
263    
264            /*
265            if (blogLineBreaks)
266            {
267                result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS);
268            }
269            */
270            result = unprotectMarkup(result, true);
271    
272            result = translateVariables(result, blogLineBreaks);
273            //result = result.replace("\n", System.getProperty("line.separator"));
274            return result;
275        }
276    
277        /** Translates lists. */
278        private static String translateLists(String content, String sourceSymbol, String targetSymbol, String sourceSymbol2)
279        {
280            String[] lines = content.split("\n");
281            StringBuffer result = new StringBuffer();
282            int counter = 0;
283            int inList = -1;
284            for (int i = 0; i < lines.length; i++)
285            {
286                String line = lines[i];
287                String actSourceSymbol = "";
288                while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2))
289                       && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol)))
290                {
291                    actSourceSymbol = line.substring(0, 1);
292                    line = line.substring(1, line.length());
293                    counter++;
294                }
295                if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter))
296                {
297                    for (int c = 0; c < counter; c++)
298                    {
299                        result.append(actSourceSymbol);
300                    }
301                    inList = -1;
302                }
303                else
304                {
305                    for (int c = 0; c < counter; c++)
306                    {
307                        if (actSourceSymbol.equals(sourceSymbol2))
308                        {
309                            result.append(sourceSymbol2);
310                        }
311                        else
312                        {
313                            result.append(targetSymbol);
314                        }
315                    }
316                    inList = counter;
317                }
318                result.append(line);
319                if (i < lines.length - 1)
320                {
321                    result.append("\n");
322                }
323                counter = 0;
324            }
325            
326            // Fixes testExtensions5
327            if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' ) 
328            {
329                result.append( '\n' );
330            }
331            
332            return result.toString();
333        }
334    
335        private String translateVariables(String result, boolean blogLineBreaks)
336        {
337            result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION);
338            result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION);
339            String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE;
340            result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks);
341            return result;
342        }
343    
344        /**
345         * Undoes the protection. This is done by replacing the md5 hashes by the
346         * original markup.
347         *
348         * @see #protectMarkup(String)
349         */
350        private String unprotectMarkup(String content,boolean replacePlugins)
351        {
352            Object[] it = this.m_hashList.toArray();
353    
354            for (int i = it.length - 1; i >= 0; i--)
355            {
356                String hash = (String) it[i];
357                String protectedMarkup = c_protectionMap.get(hash);
358                content = content.replace(hash, protectedMarkup);
359                if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 &&
360                    !protectedMarkup.substring(0, 3).equals("{{{")))&&replacePlugins)
361                    content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN);
362    
363            }
364            return content;
365        }
366    
367        /**
368         * Protects markup that should not be processed. For now this includes:
369         * <ul>
370         * <li>Preformatted sections, they should be ignored</li>
371         * </li>
372         * <li>Protocol strings like <code>http://</code>, they cause problems
373         * because of the <code>//</code> which is interpreted as italic</li>
374         * </ul>
375         * This protection is a simple method to keep the regular expressions for
376         * the other markup simple. Internally the protection is done by replacing
377         * the protected markup with the the md5 hash of the markup.
378         *
379         * @param content
380         * @return The content with protection
381         */
382        private String protectMarkup(String content)
383        {
384            c_protectionMap.clear();
385            m_hashList = new ArrayList<String>();
386            content = protectMarkup(content, PREFORMATTED_PROTECTED, "", "");
387            content = protectMarkup(content, URL_PROTECTED, "", "");
388            content = protectMarkup(content, ESCAPE_PROTECTED, "", "");
389            content = protectMarkup(content, CREOLE_PLUGIN, "", "");
390    
391            // content = protectMarkup(content, LINE_PROTECTED);
392            // content = protectMarkup(content, SIGNATURE_PROTECTED);
393            return content;
394        }
395    
396        private ArrayList readPlaceholderProperties(Properties wikiProps)
397        {
398            Set keySet = wikiProps.keySet();
399            Object[] keys = keySet.toArray();
400            ArrayList<String[]> result = new ArrayList<String[]>();
401    
402            for( int i = 0; i < keys.length; i++ )
403            {
404                String key = keys[i] + "";
405                String value = wikiProps.getProperty( keys[i] + "" );
406                if( key.indexOf( "creole.imagePlugin.para.%" ) > -1 )
407                {
408                    String[] pair = new String[2];
409                    pair[0] = key.replaceAll( "creole.imagePlugin.para.%", "" );
410                    pair[1] = value;
411                    result.add( pair );
412                }
413            }
414            return result;
415        }
416    
417        private String replaceImageArea(Properties wikiProps, String content, String markupRegex, String replaceContent, int groupPos,
418                                        String imagePlugin)
419        {
420            Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
421            String contentCopy = content;
422    
423            ArrayList plProperties = readPlaceholderProperties(wikiProps);
424    
425            while (matcher.find())
426            {
427                String protectedMarkup = matcher.group(0);
428                String paramsField = matcher.group(groupPos);
429                String paramsString = "";
430    
431                if (paramsField != null)
432                {
433                    String[] params = paramsField.split(",");
434    
435                    for (int i = 0; i < params.length; i++)
436                    {
437                        String param = params[i].replaceAll("\\||\\s", "").toUpperCase();
438    
439                        // Replace placeholder params
440                        for (int j = 0; j < plProperties.size(); j++)
441                        {
442                            String[] pair = (String[]) plProperties.get(j);
443                            String key = pair[0];
444                            String value = pair[1];
445                            String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>");
446                            code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2");
447                            if (!code.equals(param))
448                                paramsString += code;
449                        }
450    
451                        // Check if it is a number
452                        try
453                        {
454                            Integer.parseInt(param);
455                            paramsString += " width='" + param + "px'";
456                        }
457                        catch (Exception e)
458                        {
459    
460                            if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null)
461                                paramsString += " "
462                                                + wikiProps.getProperty("creole.imagePlugin.para." + param)
463                                                    .replaceAll("^(\"|')(.*)(\"|')$", "$2");
464                        }
465                    }
466                }
467                String temp = protectedMarkup;
468    
469                protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent);
470                protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString);
471                protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin);
472                protectedMarkup = protectedMarkup.replaceAll("caption=''", "");
473                protectedMarkup = protectedMarkup.replaceAll("\\s+", " ");
474    
475                int pos = contentCopy.indexOf(temp);
476                contentCopy = contentCopy.substring(0, pos) + protectedMarkup
477                              + contentCopy.substring(pos + temp.length(), contentCopy.length());
478            }
479            return contentCopy;
480        }
481    
482        private String replaceArea(String content, String markupRegex, String replaceSource, String replaceTarget)
483        {
484            Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
485            String contentCopy = content;
486    
487            while (matcher.find())
488            {
489                String protectedMarkup = matcher.group(0);
490                String temp = protectedMarkup;
491                protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
492                int pos = contentCopy.indexOf(temp);
493                contentCopy = contentCopy.substring(0, pos) + protectedMarkup
494                              + contentCopy.substring(pos + temp.length(), contentCopy.length());
495            }
496            return contentCopy;
497        }
498    
499        /**
500         * Protects a specific markup
501         *
502         * @see #protectMarkup(String)
503         */
504        private String protectMarkup(String content, String markupRegex, String replaceSource, String replaceTarget)
505        {
506            Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
507            StringBuffer result = new StringBuffer();
508            while (matcher.find())
509            {
510                String protectedMarkup = matcher.group();
511                protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
512                try
513                {
514                    MessageDigest digest = MessageDigest.getInstance("MD5");
515                    digest.reset();
516                    digest.update(protectedMarkup.getBytes("UTF-8"));
517                    String hash = bytesToHash(digest.digest());
518                    matcher.appendReplacement(result, hash);
519                    c_protectionMap.put(hash, protectedMarkup);
520                    m_hashList.add(hash);
521                }
522                catch (NoSuchAlgorithmException e)
523                {
524                    // FIXME: Should log properly
525                    e.printStackTrace();
526                }
527                catch (UnsupportedEncodingException e)
528                {
529                    // FIXME: Auto-generated catch block
530                    e.printStackTrace();
531                }
532            }
533            matcher.appendTail(result);
534            return result.toString();
535        }
536    
537        private String bytesToHash(byte[] b)
538        {
539            String hash = "";
540            for (int i = 0; i < b.length; i++)
541            {
542                hash += Integer.toString((b[i] & 0xff) + 0x100, 16).substring(1);
543            }
544            return hash;
545        }
546    
547        private String translateElement(String content, String fromMarkup, String toMarkup)
548        {
549            Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content);
550            StringBuffer result = new StringBuffer();
551    
552            while (matcher.find())
553            {
554                matcher.appendReplacement(result, toMarkup);
555            }
556            matcher.appendTail(result);
557            return result.toString();
558        }
559    }