001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import java.io.UnsupportedEncodingException; 022import java.security.MessageDigest; 023import java.security.NoSuchAlgorithmException; 024import java.text.SimpleDateFormat; 025import java.util.ArrayList; 026import java.util.Calendar; 027import java.util.HashMap; 028import java.util.Map; 029import java.util.Properties; 030import java.util.Set; 031import java.util.regex.Matcher; 032import java.util.regex.Pattern; 033 034/** 035 * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions. 036 * See http://www.wikicreole.org for the WikiCreole spec.</p> 037 * 038 * <p>This translator can be configured through properties defined in 039 * jspwiki.properties starting with "creole.*". See the 040 * jspwiki.properties file for an explanation of the properties</p> 041 * 042 * <p><b>WARNING</b>: This is an experimental feature, and known to be 043 * broken. Use at your own risk.</o> 044 * 045 * 046 * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a> 047 */ 048public class CreoleToJSPWikiTranslator 049{ 050 051 // These variables are expanded so that admins 052 // can display information about the current installed 053 // pagefilter 054 // 055 // The syntax is the same as a wiki var. Unlike a wiki 056 // war though, the CreoleTranslator itself 057 // 058 // [{$creolepagefilter.version}] 059 // [{$creolepagefilter.creoleversion}] 060 // [{$creolepagefilter.linebreak}] -> bloglike/wikilike 061 062 /** The version of the filter. */ 063 public static final String VAR_VERSION = "1.0.3"; 064 065 /** The version of Creole that this filter supports. */ 066 public static final String VAR_CREOLE_VERSION = "1.0"; 067 068 /** The linebreak style "bloglike". */ 069 public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike"; 070 071 /** The linebreak style "c2like". */ 072 public static final String VAR_LINEBREAK_C2LIKE = "c2like"; 073 074 private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))"; 075 076 private static final String JSPWIKI_BOLD = "__$1__$3"; 077 078 private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))"; 079 080 private static final String JSPWIKI_ITALIC = "''$1''$3"; 081 082 private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]"; 083 084 private static final String JSPWIKI_SIMPLELINK = "[$1]"; 085 086 private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]"; 087 088 private static final String JSPWIKI_LINK = "[$2|$1]"; 089 090 private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}"; 091 092 private static final String JSPWIKI_HEADER_0 = "$1!!!$2"; 093 094 private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}"; 095 096 private static final String JSPWIKI_HEADER_1 = "$1!!!$2"; 097 098 private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}"; 099 100 private static final String JSPWIKI_HEADER_2 = "$1!!$2"; 101 102 private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}"; 103 104 private static final String JSPWIKI_HEADER_3 = "$1!$2"; 105 106 private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}"; 107 108 private static final String JSPWIKI_HEADER_4 = "$1__$2__"; 109 110 private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}"; 111 112 private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]"; 113 114 private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}"; 115 116 private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]"; 117 118 private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]"; 119 120 private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]"; 121 122 private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]"; 123 124 private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]"; 125 126 private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E"; 127 128 //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])"; 129 130 //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2"; 131 132 private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)"; 133 134 private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>"; 135 136 private static final String JSPWIKI_PLUGIN = "[{$1}]"; 137 138 private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])"; 139 140 private static final String HTTP_URL = "$1http://$2$3"; 141 142 private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}"; 143 144 private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]"; 145 146 private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]"; 147 148 private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]"; 149 150 private static final String JSPWIKI_TABLE = "$1$2$4"; 151 152 /* TODO Is it possible to use just protect :// ? */ 153 private static final String URL_PROTECTED = "http://|ftp://|https://"; 154 155 private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))"; 156 157 private static final String SIGNATURE = "--~~~"; 158 159 private static final String SIGNATURE_AND_DATE = "--~~~~"; 160 161 private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd"; 162 163 private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)"; 164 165 private static Map<String, String> c_protectionMap = new HashMap<String, String>(); 166 167 private ArrayList<String> m_hashList = new ArrayList<String>(); 168 169 /** 170 * I have no idea what this method does. Could someone please tell me? 171 * 172 * @param wikiProps A property set 173 * @param content The content to translate? 174 * @param username The username in the signature? 175 * @return Probably some translated content. 176 */ 177 public String translateSignature(Properties wikiProps, final String content, String username) 178 { 179 180 String dateFormat = wikiProps.getProperty("creole.dateFormat"); 181 182 if (dateFormat == null) 183 { 184 dateFormat = DEFAULT_DATEFORMAT; 185 } 186 187 SimpleDateFormat df = null; 188 try 189 { 190 df = new SimpleDateFormat(dateFormat); 191 } 192 catch (Exception e) 193 { 194 e.printStackTrace(); 195 df = new SimpleDateFormat(DEFAULT_DATEFORMAT); 196 } 197 198 String result = content; 199 result = protectMarkup(result, PREFORMATTED_PROTECTED, "", ""); 200 result = protectMarkup(result, URL_PROTECTED, "", ""); 201 202 Calendar cal = Calendar.getInstance(); 203 result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime())); 204 result = translateElement(result, SIGNATURE, "-- [[" + username + "]]"); 205 result = unprotectMarkup(result, false); 206 return result; 207 } 208 209 /** 210 * Translates Creole markup to JSPWiki markup 211 * 212 * @param wikiProps A set of Wiki Properties 213 * @param content Creole markup 214 * @return Wiki markup 215 */ 216 public String translate(Properties wikiProps, final String content) 217 { 218 boolean blogLineBreaks = false; 219 /* 220 // BROKEN, breaks on different platforms. 221 String tmp = wikiProps.getProperty("creole.blogLineBreaks"); 222 if (tmp != null) 223 { 224 if (tmp.trim().equals("true")) 225 blogLineBreaks = true; 226 } 227 */ 228 String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name"); 229 230 String result = content; 231 // 232 // Breaks on OSX. It is never a good idea to tamper with the linebreaks. JSPWiki always 233 // stores linebreaks as \r\n, regardless of the platform. 234 //result = result.replace("\r\n", "\n"); 235 //result = result.replace("\r", "\n"); 236 237 /* Now protect the rest */ 238 result = protectMarkup(result); 239 result = translateLists(result, "*", "-", "Nothing"); 240 result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD); 241 result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC); 242 result = translateElement(result, WWW_URL, HTTP_URL); 243 244 if (imagePlugin != null && !imagePlugin.equals("")) 245 { 246 result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin); 247 result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin); 248 } 249 result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC); 250 result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK); 251 result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK); 252 result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK); 253 result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4); 254 result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3); 255 result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2); 256 result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1); 257 result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0); 258 result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE); 259 result = translateLists(result, "-", "*", "#"); 260 result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE); 261 result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE); 262 result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2"); 263 264 /* 265 if (blogLineBreaks) 266 { 267 result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS); 268 } 269 */ 270 result = unprotectMarkup(result, true); 271 272 result = translateVariables(result, blogLineBreaks); 273 //result = result.replace("\n", System.getProperty("line.separator")); 274 return result; 275 } 276 277 /** Translates lists. */ 278 private static String translateLists(String content, String sourceSymbol, String targetSymbol, String sourceSymbol2) 279 { 280 String[] lines = content.split("\n"); 281 StringBuilder result = new StringBuilder(); 282 int counter = 0; 283 int inList = -1; 284 for (int i = 0; i < lines.length; i++) 285 { 286 String line = lines[i]; 287 String actSourceSymbol = ""; 288 while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2)) 289 && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol))) 290 { 291 actSourceSymbol = line.substring(0, 1); 292 line = line.substring(1, line.length()); 293 counter++; 294 } 295 if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter)) 296 { 297 for (int c = 0; c < counter; c++) 298 { 299 result.append(actSourceSymbol); 300 } 301 inList = -1; 302 } 303 else 304 { 305 for (int c = 0; c < counter; c++) 306 { 307 if (actSourceSymbol.equals(sourceSymbol2)) 308 { 309 result.append(sourceSymbol2); 310 } 311 else 312 { 313 result.append(targetSymbol); 314 } 315 } 316 inList = counter; 317 } 318 result.append(line); 319 if (i < lines.length - 1) 320 { 321 result.append("\n"); 322 } 323 counter = 0; 324 } 325 326 // Fixes testExtensions5 327 if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' ) 328 { 329 result.append( '\n' ); 330 } 331 332 return result.toString(); 333 } 334 335 private String translateVariables(String result, boolean blogLineBreaks) 336 { 337 result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION); 338 result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION); 339 String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE; 340 result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks); 341 return result; 342 } 343 344 /** 345 * Undoes the protection. This is done by replacing the md5 hashes by the 346 * original markup. 347 * 348 * @see #protectMarkup(String) 349 */ 350 private String unprotectMarkup(String content,boolean replacePlugins) 351 { 352 Object[] it = this.m_hashList.toArray(); 353 354 for (int i = it.length - 1; i >= 0; i--) 355 { 356 String hash = (String) it[i]; 357 String protectedMarkup = c_protectionMap.get(hash); 358 content = content.replace(hash, protectedMarkup); 359 if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 && 360 !protectedMarkup.substring(0, 3).equals("{{{")))&&replacePlugins) 361 content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN); 362 363 } 364 return content; 365 } 366 367 /** 368 * Protects markup that should not be processed. For now this includes: 369 * <ul> 370 * <li>Preformatted sections, they should be ignored</li> 371 * </li> 372 * <li>Protocol strings like <code>http://</code>, they cause problems 373 * because of the <code>//</code> which is interpreted as italic</li> 374 * </ul> 375 * This protection is a simple method to keep the regular expressions for 376 * the other markup simple. Internally the protection is done by replacing 377 * the protected markup with the the md5 hash of the markup. 378 * 379 * @param content 380 * @return The content with protection 381 */ 382 private String protectMarkup(String content) 383 { 384 c_protectionMap.clear(); 385 m_hashList = new ArrayList<String>(); 386 content = protectMarkup(content, PREFORMATTED_PROTECTED, "", ""); 387 content = protectMarkup(content, URL_PROTECTED, "", ""); 388 content = protectMarkup(content, ESCAPE_PROTECTED, "", ""); 389 content = protectMarkup(content, CREOLE_PLUGIN, "", ""); 390 391 // content = protectMarkup(content, LINE_PROTECTED); 392 // content = protectMarkup(content, SIGNATURE_PROTECTED); 393 return content; 394 } 395 396 private ArrayList< String[] > readPlaceholderProperties(Properties wikiProps) 397 { 398 Set< Object > keySet = wikiProps.keySet(); 399 Object[] keys = keySet.toArray(); 400 ArrayList<String[]> result = new ArrayList<String[]>(); 401 402 for( int i = 0; i < keys.length; i++ ) 403 { 404 String key = keys[i] + ""; 405 String value = wikiProps.getProperty( keys[i] + "" ); 406 if( key.indexOf( "creole.imagePlugin.para.%" ) > -1 ) 407 { 408 String[] pair = new String[2]; 409 pair[0] = key.replaceAll( "creole.imagePlugin.para.%", "" ); 410 pair[1] = value; 411 result.add( pair ); 412 } 413 } 414 return result; 415 } 416 417 private String replaceImageArea(Properties wikiProps, String content, String markupRegex, String replaceContent, int groupPos, 418 String imagePlugin) 419 { 420 Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 421 String contentCopy = content; 422 423 ArrayList< String[] > plProperties = readPlaceholderProperties(wikiProps); 424 425 while (matcher.find()) 426 { 427 String protectedMarkup = matcher.group(0); 428 String paramsField = matcher.group(groupPos); 429 String paramsString = ""; 430 431 if (paramsField != null) 432 { 433 String[] params = paramsField.split(","); 434 435 for (int i = 0; i < params.length; i++) 436 { 437 String param = params[i].replaceAll("\\||\\s", "").toUpperCase(); 438 439 // Replace placeholder params 440 for (int j = 0; j < plProperties.size(); j++) 441 { 442 String[] pair = plProperties.get(j); 443 String key = pair[0]; 444 String value = pair[1]; 445 String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>"); 446 code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2"); 447 if (!code.equals(param)) { 448 paramsString += code; 449 } 450 } 451 452 // Check if it is a number 453 try 454 { 455 Integer.parseInt(param); 456 paramsString += " width='" + param + "px'"; 457 } 458 catch (Exception e) 459 { 460 461 if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null) 462 paramsString += " " 463 + wikiProps.getProperty("creole.imagePlugin.para." + param) 464 .replaceAll("^(\"|')(.*)(\"|')$", "$2"); 465 } 466 } 467 } 468 String temp = protectedMarkup; 469 470 protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent); 471 protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString); 472 protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin); 473 protectedMarkup = protectedMarkup.replaceAll("caption=''", ""); 474 protectedMarkup = protectedMarkup.replaceAll("\\s+", " "); 475 476 int pos = contentCopy.indexOf(temp); 477 contentCopy = contentCopy.substring(0, pos) + protectedMarkup 478 + contentCopy.substring(pos + temp.length(), contentCopy.length()); 479 } 480 return contentCopy; 481 } 482 483 private String replaceArea(String content, String markupRegex, String replaceSource, String replaceTarget) 484 { 485 Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 486 String contentCopy = content; 487 488 while (matcher.find()) 489 { 490 String protectedMarkup = matcher.group(0); 491 String temp = protectedMarkup; 492 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget); 493 int pos = contentCopy.indexOf(temp); 494 contentCopy = contentCopy.substring(0, pos) + protectedMarkup 495 + contentCopy.substring(pos + temp.length(), contentCopy.length()); 496 } 497 return contentCopy; 498 } 499 500 /** 501 * Protects a specific markup 502 * 503 * @see #protectMarkup(String) 504 */ 505 private String protectMarkup(String content, String markupRegex, String replaceSource, String replaceTarget) 506 { 507 Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 508 StringBuffer result = new StringBuffer(); 509 while (matcher.find()) 510 { 511 String protectedMarkup = matcher.group(); 512 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget); 513 try 514 { 515 MessageDigest digest = MessageDigest.getInstance("MD5"); 516 digest.reset(); 517 digest.update(protectedMarkup.getBytes("UTF-8")); 518 String hash = bytesToHash(digest.digest()); 519 matcher.appendReplacement(result, hash); 520 c_protectionMap.put(hash, protectedMarkup); 521 m_hashList.add(hash); 522 } 523 catch (NoSuchAlgorithmException e) 524 { 525 // FIXME: Should log properly 526 e.printStackTrace(); 527 } 528 catch (UnsupportedEncodingException e) 529 { 530 // FIXME: Auto-generated catch block 531 e.printStackTrace(); 532 } 533 } 534 matcher.appendTail(result); 535 return result.toString(); 536 } 537 538 private String bytesToHash(byte[] b) 539 { 540 String hash = ""; 541 for (int i = 0; i < b.length; i++) 542 { 543 hash += Integer.toString((b[i] & 0xff) + 0x100, 16).substring(1); 544 } 545 return hash; 546 } 547 548 private String translateElement(String content, String fromMarkup, String toMarkup) 549 { 550 Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content); 551 StringBuffer result = new StringBuffer(); 552 553 while (matcher.find()) 554 { 555 matcher.appendReplacement(result, toMarkup); 556 } 557 matcher.appendTail(result); 558 return result.toString(); 559 } 560}