001 /*
002 Licensed to the Apache Software Foundation (ASF) under one
003 or more contributor license agreements. See the NOTICE file
004 distributed with this work for additional information
005 regarding copyright ownership. The ASF licenses this file
006 to you under the Apache License, Version 2.0 (the
007 "License"); you may not use this file except in compliance
008 with the License. You may obtain a copy of the License at
009
010 http://www.apache.org/licenses/LICENSE-2.0
011
012 Unless required by applicable law or agreed to in writing,
013 software distributed under the License is distributed on an
014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 KIND, either express or implied. See the License for the
016 specific language governing permissions and limitations
017 under the License.
018 */
019 package org.apache.wiki.parser;
020
021 import java.io.UnsupportedEncodingException;
022 import java.security.MessageDigest;
023 import java.security.NoSuchAlgorithmException;
024 import java.text.SimpleDateFormat;
025 import java.util.ArrayList;
026 import java.util.Calendar;
027 import java.util.HashMap;
028 import java.util.Map;
029 import java.util.Properties;
030 import java.util.Set;
031 import java.util.regex.Matcher;
032 import java.util.regex.Pattern;
033
034 /**
035 * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions.
036 * See http://www.wikicreole.org for the WikiCreole spec.</p>
037 *
038 * <p>This translator can be configured through properties defined in
039 * jspwiki.properties starting with "creole.*". See the
040 * jspwiki.properties file for an explanation of the properties</p>
041 *
042 * <p><b>WARNING</b>: This is an experimental feature, and known to be
043 * broken. Use at your own risk.</o>
044 *
045 *
046 * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a>
047 */
048 public class CreoleToJSPWikiTranslator
049 {
050
051 // These variables are expanded so that admins
052 // can display information about the current installed
053 // pagefilter
054 //
055 // The syntax is the same as a wiki var. Unlike a wiki
056 // war though, the CreoleTranslator itself
057 //
058 // [{$creolepagefilter.version}]
059 // [{$creolepagefilter.creoleversion}]
060 // [{$creolepagefilter.linebreak}] -> bloglike/wikilike
061
062 /** The version of the filter. */
063 public static final String VAR_VERSION = "1.0.3";
064
065 /** The version of Creole that this filter supports. */
066 public static final String VAR_CREOLE_VERSION = "1.0";
067
068 /** The linebreak style "bloglike". */
069 public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike";
070
071 /** The linebreak style "c2like". */
072 public static final String VAR_LINEBREAK_C2LIKE = "c2like";
073
074 private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))";
075
076 private static final String JSPWIKI_BOLD = "__$1__$3";
077
078 private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))";
079
080 private static final String JSPWIKI_ITALIC = "''$1''$3";
081
082 private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]";
083
084 private static final String JSPWIKI_SIMPLELINK = "[$1]";
085
086 private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]";
087
088 private static final String JSPWIKI_LINK = "[$2|$1]";
089
090 private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}";
091
092 private static final String JSPWIKI_HEADER_0 = "$1!!!$2";
093
094 private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}";
095
096 private static final String JSPWIKI_HEADER_1 = "$1!!!$2";
097
098 private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}";
099
100 private static final String JSPWIKI_HEADER_2 = "$1!!$2";
101
102 private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}";
103
104 private static final String JSPWIKI_HEADER_3 = "$1!$2";
105
106 private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}";
107
108 private static final String JSPWIKI_HEADER_4 = "$1__$2__";
109
110 private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}";
111
112 private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]";
113
114 private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}";
115
116 private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]";
117
118 private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]";
119
120 private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]";
121
122 private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]";
123
124 private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]";
125
126 private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E";
127
128 //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])";
129
130 //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2";
131
132 private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)";
133
134 private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>";
135
136 private static final String JSPWIKI_PLUGIN = "[{$1}]";
137
138 private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])";
139
140 private static final String HTTP_URL = "$1http://$2$3";
141
142 private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}";
143
144 private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]";
145
146 private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]";
147
148 private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]";
149
150 private static final String JSPWIKI_TABLE = "$1$2$4";
151
152 /* TODO Is it possible to use just protect :// ? */
153 private static final String URL_PROTECTED = "http://|ftp://|https://";
154
155 private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))";
156
157 private static final String SIGNATURE = "--~~~";
158
159 private static final String SIGNATURE_AND_DATE = "--~~~~";
160
161 private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd";
162
163 private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)";
164
165 private static Map<String, String> c_protectionMap = new HashMap<String, String>();
166
167 private ArrayList<String> m_hashList = new ArrayList<String>();
168
169 /**
170 * I have no idea what this method does. Could someone please tell me?
171 *
172 * @param wikiProps A property set
173 * @param content The content to translate?
174 * @param username The username in the signature?
175 * @return Probably some translated content.
176 */
177 public String translateSignature(Properties wikiProps, final String content, String username)
178 {
179
180 String dateFormat = wikiProps.getProperty("creole.dateFormat");
181
182 if (dateFormat == null)
183 {
184 dateFormat = DEFAULT_DATEFORMAT;
185 }
186
187 SimpleDateFormat df = null;
188 try
189 {
190 df = new SimpleDateFormat(dateFormat);
191 }
192 catch (Exception e)
193 {
194 e.printStackTrace();
195 df = new SimpleDateFormat(DEFAULT_DATEFORMAT);
196 }
197
198 String result = content;
199 result = protectMarkup(result, PREFORMATTED_PROTECTED, "", "");
200 result = protectMarkup(result, URL_PROTECTED, "", "");
201
202 Calendar cal = Calendar.getInstance();
203 result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime()));
204 result = translateElement(result, SIGNATURE, "-- [[" + username + "]]");
205 result = unprotectMarkup(result, false);
206 return result;
207 }
208
209 /**
210 * Translates Creole markup to JSPWiki markup
211 *
212 * @param wikiProps A set of Wiki Properties
213 * @param content Creole markup
214 * @return Wiki markup
215 */
216 public String translate(Properties wikiProps, final String content)
217 {
218 boolean blogLineBreaks = false;
219 /*
220 // BROKEN, breaks on different platforms.
221 String tmp = wikiProps.getProperty("creole.blogLineBreaks");
222 if (tmp != null)
223 {
224 if (tmp.trim().equals("true"))
225 blogLineBreaks = true;
226 }
227 */
228 String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name");
229
230 String result = content;
231 //
232 // Breaks on OSX. It is never a good idea to tamper with the linebreaks. JSPWiki always
233 // stores linebreaks as \r\n, regardless of the platform.
234 //result = result.replace("\r\n", "\n");
235 //result = result.replace("\r", "\n");
236
237 /* Now protect the rest */
238 result = protectMarkup(result);
239 result = translateLists(result, "*", "-", "Nothing");
240 result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD);
241 result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC);
242 result = translateElement(result, WWW_URL, HTTP_URL);
243
244 if (imagePlugin != null && !imagePlugin.equals(""))
245 {
246 result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin);
247 result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin);
248 }
249 result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC);
250 result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK);
251 result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK);
252 result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK);
253 result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4);
254 result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3);
255 result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2);
256 result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1);
257 result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0);
258 result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE);
259 result = translateLists(result, "-", "*", "#");
260 result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE);
261 result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE);
262 result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2");
263
264 /*
265 if (blogLineBreaks)
266 {
267 result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS);
268 }
269 */
270 result = unprotectMarkup(result, true);
271
272 result = translateVariables(result, blogLineBreaks);
273 //result = result.replace("\n", System.getProperty("line.separator"));
274 return result;
275 }
276
277 /** Translates lists. */
278 private static String translateLists(String content, String sourceSymbol, String targetSymbol, String sourceSymbol2)
279 {
280 String[] lines = content.split("\n");
281 StringBuffer result = new StringBuffer();
282 int counter = 0;
283 int inList = -1;
284 for (int i = 0; i < lines.length; i++)
285 {
286 String line = lines[i];
287 String actSourceSymbol = "";
288 while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2))
289 && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol)))
290 {
291 actSourceSymbol = line.substring(0, 1);
292 line = line.substring(1, line.length());
293 counter++;
294 }
295 if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter))
296 {
297 for (int c = 0; c < counter; c++)
298 {
299 result.append(actSourceSymbol);
300 }
301 inList = -1;
302 }
303 else
304 {
305 for (int c = 0; c < counter; c++)
306 {
307 if (actSourceSymbol.equals(sourceSymbol2))
308 {
309 result.append(sourceSymbol2);
310 }
311 else
312 {
313 result.append(targetSymbol);
314 }
315 }
316 inList = counter;
317 }
318 result.append(line);
319 if (i < lines.length - 1)
320 {
321 result.append("\n");
322 }
323 counter = 0;
324 }
325
326 // Fixes testExtensions5
327 if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' )
328 {
329 result.append( '\n' );
330 }
331
332 return result.toString();
333 }
334
335 private String translateVariables(String result, boolean blogLineBreaks)
336 {
337 result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION);
338 result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION);
339 String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE;
340 result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks);
341 return result;
342 }
343
344 /**
345 * Undoes the protection. This is done by replacing the md5 hashes by the
346 * original markup.
347 *
348 * @see #protectMarkup(String)
349 */
350 private String unprotectMarkup(String content,boolean replacePlugins)
351 {
352 Object[] it = this.m_hashList.toArray();
353
354 for (int i = it.length - 1; i >= 0; i--)
355 {
356 String hash = (String) it[i];
357 String protectedMarkup = c_protectionMap.get(hash);
358 content = content.replace(hash, protectedMarkup);
359 if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 &&
360 !protectedMarkup.substring(0, 3).equals("{{{")))&&replacePlugins)
361 content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN);
362
363 }
364 return content;
365 }
366
367 /**
368 * Protects markup that should not be processed. For now this includes:
369 * <ul>
370 * <li>Preformatted sections, they should be ignored</li>
371 * </li>
372 * <li>Protocol strings like <code>http://</code>, they cause problems
373 * because of the <code>//</code> which is interpreted as italic</li>
374 * </ul>
375 * This protection is a simple method to keep the regular expressions for
376 * the other markup simple. Internally the protection is done by replacing
377 * the protected markup with the the md5 hash of the markup.
378 *
379 * @param content
380 * @return The content with protection
381 */
382 private String protectMarkup(String content)
383 {
384 c_protectionMap.clear();
385 m_hashList = new ArrayList<String>();
386 content = protectMarkup(content, PREFORMATTED_PROTECTED, "", "");
387 content = protectMarkup(content, URL_PROTECTED, "", "");
388 content = protectMarkup(content, ESCAPE_PROTECTED, "", "");
389 content = protectMarkup(content, CREOLE_PLUGIN, "", "");
390
391 // content = protectMarkup(content, LINE_PROTECTED);
392 // content = protectMarkup(content, SIGNATURE_PROTECTED);
393 return content;
394 }
395
396 private ArrayList readPlaceholderProperties(Properties wikiProps)
397 {
398 Set keySet = wikiProps.keySet();
399 Object[] keys = keySet.toArray();
400 ArrayList<String[]> result = new ArrayList<String[]>();
401
402 for( int i = 0; i < keys.length; i++ )
403 {
404 String key = keys[i] + "";
405 String value = wikiProps.getProperty( keys[i] + "" );
406 if( key.indexOf( "creole.imagePlugin.para.%" ) > -1 )
407 {
408 String[] pair = new String[2];
409 pair[0] = key.replaceAll( "creole.imagePlugin.para.%", "" );
410 pair[1] = value;
411 result.add( pair );
412 }
413 }
414 return result;
415 }
416
417 private String replaceImageArea(Properties wikiProps, String content, String markupRegex, String replaceContent, int groupPos,
418 String imagePlugin)
419 {
420 Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
421 String contentCopy = content;
422
423 ArrayList plProperties = readPlaceholderProperties(wikiProps);
424
425 while (matcher.find())
426 {
427 String protectedMarkup = matcher.group(0);
428 String paramsField = matcher.group(groupPos);
429 String paramsString = "";
430
431 if (paramsField != null)
432 {
433 String[] params = paramsField.split(",");
434
435 for (int i = 0; i < params.length; i++)
436 {
437 String param = params[i].replaceAll("\\||\\s", "").toUpperCase();
438
439 // Replace placeholder params
440 for (int j = 0; j < plProperties.size(); j++)
441 {
442 String[] pair = (String[]) plProperties.get(j);
443 String key = pair[0];
444 String value = pair[1];
445 String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>");
446 code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2");
447 if (!code.equals(param))
448 paramsString += code;
449 }
450
451 // Check if it is a number
452 try
453 {
454 Integer.parseInt(param);
455 paramsString += " width='" + param + "px'";
456 }
457 catch (Exception e)
458 {
459
460 if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null)
461 paramsString += " "
462 + wikiProps.getProperty("creole.imagePlugin.para." + param)
463 .replaceAll("^(\"|')(.*)(\"|')$", "$2");
464 }
465 }
466 }
467 String temp = protectedMarkup;
468
469 protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent);
470 protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString);
471 protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin);
472 protectedMarkup = protectedMarkup.replaceAll("caption=''", "");
473 protectedMarkup = protectedMarkup.replaceAll("\\s+", " ");
474
475 int pos = contentCopy.indexOf(temp);
476 contentCopy = contentCopy.substring(0, pos) + protectedMarkup
477 + contentCopy.substring(pos + temp.length(), contentCopy.length());
478 }
479 return contentCopy;
480 }
481
482 private String replaceArea(String content, String markupRegex, String replaceSource, String replaceTarget)
483 {
484 Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
485 String contentCopy = content;
486
487 while (matcher.find())
488 {
489 String protectedMarkup = matcher.group(0);
490 String temp = protectedMarkup;
491 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
492 int pos = contentCopy.indexOf(temp);
493 contentCopy = contentCopy.substring(0, pos) + protectedMarkup
494 + contentCopy.substring(pos + temp.length(), contentCopy.length());
495 }
496 return contentCopy;
497 }
498
499 /**
500 * Protects a specific markup
501 *
502 * @see #protectMarkup(String)
503 */
504 private String protectMarkup(String content, String markupRegex, String replaceSource, String replaceTarget)
505 {
506 Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
507 StringBuffer result = new StringBuffer();
508 while (matcher.find())
509 {
510 String protectedMarkup = matcher.group();
511 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget);
512 try
513 {
514 MessageDigest digest = MessageDigest.getInstance("MD5");
515 digest.reset();
516 digest.update(protectedMarkup.getBytes("UTF-8"));
517 String hash = bytesToHash(digest.digest());
518 matcher.appendReplacement(result, hash);
519 c_protectionMap.put(hash, protectedMarkup);
520 m_hashList.add(hash);
521 }
522 catch (NoSuchAlgorithmException e)
523 {
524 // FIXME: Should log properly
525 e.printStackTrace();
526 }
527 catch (UnsupportedEncodingException e)
528 {
529 // FIXME: Auto-generated catch block
530 e.printStackTrace();
531 }
532 }
533 matcher.appendTail(result);
534 return result.toString();
535 }
536
537 private String bytesToHash(byte[] b)
538 {
539 String hash = "";
540 for (int i = 0; i < b.length; i++)
541 {
542 hash += Integer.toString((b[i] & 0xff) + 0x100, 16).substring(1);
543 }
544 return hash;
545 }
546
547 private String translateElement(String content, String fromMarkup, String toMarkup)
548 {
549 Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content);
550 StringBuffer result = new StringBuffer();
551
552 while (matcher.find())
553 {
554 matcher.appendReplacement(result, toMarkup);
555 }
556 matcher.appendTail(result);
557 return result.toString();
558 }
559 }