001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import java.io.UnsupportedEncodingException; 022import java.nio.charset.StandardCharsets; 023import java.security.MessageDigest; 024import java.security.NoSuchAlgorithmException; 025import java.text.SimpleDateFormat; 026import java.util.ArrayList; 027import java.util.Calendar; 028import java.util.HashMap; 029import java.util.Map; 030import java.util.Properties; 031import java.util.Set; 032import java.util.regex.Matcher; 033import java.util.regex.Pattern; 034 035/** 036 * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions. 037 * See http://www.wikicreole.org for the WikiCreole spec.</p> 038 * 039 * <p>This translator can be configured through properties defined in 040 * jspwiki.properties starting with "creole.*". See the 041 * jspwiki.properties file for an explanation of the properties</p> 042 * 043 * <p><b>WARNING</b>: This is an experimental feature, and known to be 044 * broken. Use at your own risk.</o> 045 * 046 * 047 * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a> 048 */ 049public class CreoleToJSPWikiTranslator 050{ 051 052 // These variables are expanded so that admins 053 // can display information about the current installed 054 // pagefilter 055 // 056 // The syntax is the same as a wiki var. Unlike a wiki 057 // war though, the CreoleTranslator itself 058 // 059 // [{$creolepagefilter.version}] 060 // [{$creolepagefilter.creoleversion}] 061 // [{$creolepagefilter.linebreak}] -> bloglike/wikilike 062 063 /** The version of the filter. */ 064 public static final String VAR_VERSION = "1.0.3"; 065 066 /** The version of Creole that this filter supports. */ 067 public static final String VAR_CREOLE_VERSION = "1.0"; 068 069 /** The linebreak style "bloglike". */ 070 public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike"; 071 072 /** The linebreak style "c2like". */ 073 public static final String VAR_LINEBREAK_C2LIKE = "c2like"; 074 075 private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))"; 076 077 private static final String JSPWIKI_BOLD = "__$1__$3"; 078 079 private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))"; 080 081 private static final String JSPWIKI_ITALIC = "''$1''$3"; 082 083 private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]"; 084 085 private static final String JSPWIKI_SIMPLELINK = "[$1]"; 086 087 private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]"; 088 089 private static final String JSPWIKI_LINK = "[$2|$1]"; 090 091 private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}"; 092 093 private static final String JSPWIKI_HEADER_0 = "$1!!!$2"; 094 095 private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}"; 096 097 private static final String JSPWIKI_HEADER_1 = "$1!!!$2"; 098 099 private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}"; 100 101 private static final String JSPWIKI_HEADER_2 = "$1!!$2"; 102 103 private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}"; 104 105 private static final String JSPWIKI_HEADER_3 = "$1!$2"; 106 107 private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}"; 108 109 private static final String JSPWIKI_HEADER_4 = "$1__$2__"; 110 111 private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}"; 112 113 private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]"; 114 115 private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}"; 116 117 private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]"; 118 119 private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]"; 120 121 private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]"; 122 123 private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]"; 124 125 private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]"; 126 127 private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E"; 128 129 //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])"; 130 131 //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2"; 132 133 private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)"; 134 135 private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>"; 136 137 private static final String JSPWIKI_PLUGIN = "[{$1}]"; 138 139 private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])"; 140 141 private static final String HTTP_URL = "$1http://$2$3"; 142 143 private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}"; 144 145 private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]"; 146 147 private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]"; 148 149 private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]"; 150 151 private static final String JSPWIKI_TABLE = "$1$2$4"; 152 153 /* TODO Is it possible to use just protect :// ? */ 154 private static final String URL_PROTECTED = "http://|ftp://|https://"; 155 156 private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))"; 157 158 private static final String SIGNATURE = "--~~~"; 159 160 private static final String SIGNATURE_AND_DATE = "--~~~~"; 161 162 private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd"; 163 164 private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)"; 165 166 private static final Map<String, String> c_protectionMap = new HashMap<String, String>(); 167 168 private ArrayList<String> m_hashList = new ArrayList<String>(); 169 170 /** 171 * I have no idea what this method does. Could someone please tell me? 172 * 173 * @param wikiProps A property set 174 * @param content The content to translate? 175 * @param username The username in the signature? 176 * @return Probably some translated content. 177 */ 178 public String translateSignature(final Properties wikiProps, final String content, final String username) 179 { 180 181 String dateFormat = wikiProps.getProperty("creole.dateFormat"); 182 183 if (dateFormat == null) 184 { 185 dateFormat = DEFAULT_DATEFORMAT; 186 } 187 188 SimpleDateFormat df = null; 189 try 190 { 191 df = new SimpleDateFormat(dateFormat); 192 } 193 catch (final Exception e) 194 { 195 e.printStackTrace(); 196 df = new SimpleDateFormat(DEFAULT_DATEFORMAT); 197 } 198 199 String result = content; 200 result = protectMarkup(result, PREFORMATTED_PROTECTED, "", ""); 201 result = protectMarkup(result, URL_PROTECTED, "", ""); 202 203 final Calendar cal = Calendar.getInstance(); 204 result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime())); 205 result = translateElement(result, SIGNATURE, "-- [[" + username + "]]"); 206 result = unprotectMarkup(result, false); 207 return result; 208 } 209 210 /** 211 * Translates Creole markup to JSPWiki markup 212 * 213 * @param wikiProps A set of Wiki Properties 214 * @param content Creole markup 215 * @return Wiki markup 216 */ 217 public String translate(final Properties wikiProps, final String content) 218 { 219 final boolean blogLineBreaks = false; 220 /* 221 // BROKEN, breaks on different platforms. 222 String tmp = wikiProps.getProperty("creole.blogLineBreaks"); 223 if (tmp != null) 224 { 225 if (tmp.trim().equals("true")) 226 blogLineBreaks = true; 227 } 228 */ 229 final String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name"); 230 231 String result = content; 232 // 233 // Breaks on OSX. It is never a good idea to tamper with the linebreaks. JSPWiki always 234 // stores linebreaks as \r\n, regardless of the platform. 235 //result = result.replace("\r\n", "\n"); 236 //result = result.replace("\r", "\n"); 237 238 /* Now protect the rest */ 239 result = protectMarkup(result); 240 result = translateLists(result, "*", "-", "Nothing"); 241 result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD); 242 result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC); 243 result = translateElement(result, WWW_URL, HTTP_URL); 244 245 if (imagePlugin != null && !imagePlugin.equals("")) 246 { 247 result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin); 248 result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin); 249 } 250 result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC); 251 result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK); 252 result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK); 253 result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK); 254 result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4); 255 result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3); 256 result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2); 257 result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1); 258 result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0); 259 result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE); 260 result = translateLists(result, "-", "*", "#"); 261 result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE); 262 result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE); 263 result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2"); 264 265 /* 266 if (blogLineBreaks) 267 { 268 result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS); 269 } 270 */ 271 result = unprotectMarkup(result, true); 272 273 result = translateVariables(result, blogLineBreaks); 274 //result = result.replace("\n", System.getProperty("line.separator")); 275 return result; 276 } 277 278 /** Translates lists. */ 279 private static String translateLists(final String content, final String sourceSymbol, final String targetSymbol, final String sourceSymbol2) 280 { 281 final String[] lines = content.split("\n"); 282 final StringBuilder result = new StringBuilder(); 283 int counter = 0; 284 int inList = -1; 285 for (int i = 0; i < lines.length; i++) 286 { 287 String line = lines[i]; 288 String actSourceSymbol = ""; 289 while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2)) 290 && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol))) 291 { 292 actSourceSymbol = line.substring(0, 1); 293 line = line.substring( 1 ); 294 counter++; 295 } 296 if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter)) 297 { 298 for (int c = 0; c < counter; c++) 299 { 300 result.append(actSourceSymbol); 301 } 302 inList = -1; 303 } 304 else 305 { 306 for (int c = 0; c < counter; c++) 307 { 308 if (actSourceSymbol.equals(sourceSymbol2)) 309 { 310 result.append(sourceSymbol2); 311 } 312 else 313 { 314 result.append(targetSymbol); 315 } 316 } 317 inList = counter; 318 } 319 result.append(line); 320 if (i < lines.length - 1) 321 { 322 result.append("\n"); 323 } 324 counter = 0; 325 } 326 327 // Fixes testExtensions5 328 if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' ) 329 { 330 result.append( '\n' ); 331 } 332 333 return result.toString(); 334 } 335 336 private String translateVariables(String result, final boolean blogLineBreaks) 337 { 338 result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION); 339 result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION); 340 final String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE; 341 result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks); 342 return result; 343 } 344 345 /** 346 * Undoes the protection. This is done by replacing the md5 hashes by the 347 * original markup. 348 * 349 * @see #protectMarkup(String) 350 */ 351 private String unprotectMarkup(String content, final boolean replacePlugins) 352 { 353 final Object[] it = this.m_hashList.toArray(); 354 355 for (int i = it.length - 1; i >= 0; i--) 356 { 357 final String hash = (String) it[i]; 358 final String protectedMarkup = c_protectionMap.get(hash); 359 content = content.replace(hash, protectedMarkup); 360 if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 && 361 !protectedMarkup.startsWith("{{{")))&&replacePlugins) 362 content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN); 363 364 } 365 return content; 366 } 367 368 /** 369 * Protects markup that should not be processed. For now this includes: 370 * <ul> 371 * <li>Preformatted sections, they should be ignored</li> 372 * </li> 373 * <li>Protocol strings like <code>http://</code>, they cause problems 374 * because of the <code>//</code> which is interpreted as italic</li> 375 * </ul> 376 * This protection is a simple method to keep the regular expressions for 377 * the other markup simple. Internally the protection is done by replacing 378 * the protected markup with the the md5 hash of the markup. 379 * 380 * @param content 381 * @return The content with protection 382 */ 383 private String protectMarkup(String content) 384 { 385 c_protectionMap.clear(); 386 m_hashList = new ArrayList<String>(); 387 content = protectMarkup(content, PREFORMATTED_PROTECTED, "", ""); 388 content = protectMarkup(content, URL_PROTECTED, "", ""); 389 content = protectMarkup(content, ESCAPE_PROTECTED, "", ""); 390 content = protectMarkup(content, CREOLE_PLUGIN, "", ""); 391 392 // content = protectMarkup(content, LINE_PROTECTED); 393 // content = protectMarkup(content, SIGNATURE_PROTECTED); 394 return content; 395 } 396 397 private ArrayList< String[] > readPlaceholderProperties(final Properties wikiProps) 398 { 399 final Set< Object > keySet = wikiProps.keySet(); 400 final Object[] keys = keySet.toArray(); 401 final ArrayList<String[]> result = new ArrayList<String[]>(); 402 403 for( int i = 0; i < keys.length; i++ ) 404 { 405 final String key = keys[i] + ""; 406 final String value = wikiProps.getProperty( keys[i] + "" ); 407 if( key.indexOf( "creole.imagePlugin.para.%" ) > -1 ) 408 { 409 final String[] pair = new String[2]; 410 pair[0] = key.replaceAll( "creole.imagePlugin.para.%", "" ); 411 pair[1] = value; 412 result.add( pair ); 413 } 414 } 415 return result; 416 } 417 418 private String replaceImageArea(final Properties wikiProps, final String content, final String markupRegex, final String replaceContent, final int groupPos, 419 final String imagePlugin) 420 { 421 final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 422 String contentCopy = content; 423 424 final ArrayList< String[] > plProperties = readPlaceholderProperties(wikiProps); 425 426 while (matcher.find()) 427 { 428 String protectedMarkup = matcher.group(0); 429 final String paramsField = matcher.group(groupPos); 430 final StringBuilder paramsString = new StringBuilder(); 431 432 if (paramsField != null) 433 { 434 final String[] params = paramsField.split(","); 435 436 for (int i = 0; i < params.length; i++) 437 { 438 final String param = params[i].replaceAll("\\||\\s", "").toUpperCase(); 439 440 // Replace placeholder params 441 for (int j = 0; j < plProperties.size(); j++) 442 { 443 final String[] pair = plProperties.get(j); 444 final String key = pair[0]; 445 final String value = pair[1]; 446 String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>"); 447 code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2"); 448 if (!code.equals(param)) { 449 paramsString.append(code); 450 } 451 } 452 453 // Check if it is a number 454 try 455 { 456 Integer.parseInt(param); 457 paramsString.append(" width='").append(param).append("px'"); 458 } 459 catch (final Exception e) 460 { 461 462 if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null) 463 paramsString.append(" ").append(wikiProps.getProperty("creole.imagePlugin.para." + param) 464 .replaceAll("^(\"|')(.*)(\"|')$", "$2")); 465 } 466 } 467 } 468 final String temp = protectedMarkup; 469 470 protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent); 471 protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString.toString()); 472 protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin); 473 protectedMarkup = protectedMarkup.replaceAll("caption=''", ""); 474 protectedMarkup = protectedMarkup.replaceAll("\\s+", " "); 475 476 final int pos = contentCopy.indexOf(temp); 477 contentCopy = contentCopy.substring(0, pos) + protectedMarkup 478 + contentCopy.substring(pos + temp.length()); 479 } 480 return contentCopy; 481 } 482 483 private String replaceArea(final String content, final String markupRegex, final String replaceSource, final String replaceTarget) 484 { 485 final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 486 String contentCopy = content; 487 488 while (matcher.find()) 489 { 490 String protectedMarkup = matcher.group(0); 491 final String temp = protectedMarkup; 492 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget); 493 final int pos = contentCopy.indexOf(temp); 494 contentCopy = contentCopy.substring(0, pos) + protectedMarkup 495 + contentCopy.substring(pos + temp.length() ); 496 } 497 return contentCopy; 498 } 499 500 /** 501 * Protects a specific markup 502 * 503 * @see #protectMarkup(String) 504 */ 505 private String protectMarkup(final String content, final String markupRegex, final String replaceSource, final String replaceTarget) 506 { 507 final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 508 final StringBuffer result = new StringBuffer(); 509 while (matcher.find()) 510 { 511 String protectedMarkup = matcher.group(); 512 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget); 513 try 514 { 515 final MessageDigest digest = MessageDigest.getInstance("MD5"); 516 digest.reset(); 517 digest.update(protectedMarkup.getBytes(StandardCharsets.UTF_8.name())); 518 final String hash = bytesToHash(digest.digest()); 519 matcher.appendReplacement(result, hash); 520 c_protectionMap.put(hash, protectedMarkup); 521 m_hashList.add(hash); 522 } 523 catch (final NoSuchAlgorithmException e) 524 { 525 // FIXME: Should log properly 526 e.printStackTrace(); 527 } 528 catch (final UnsupportedEncodingException e) 529 { 530 // FIXME: Auto-generated catch block 531 e.printStackTrace(); 532 } 533 } 534 matcher.appendTail(result); 535 return result.toString(); 536 } 537 538 private String bytesToHash(final byte[] b) 539 { 540 final StringBuilder hash = new StringBuilder(); 541 for (int i = 0; i < b.length; i++) 542 { 543 hash.append(Integer.toString((b[i] & 0xff) + 0x100, 16).substring(1)); 544 } 545 return hash.toString(); 546 } 547 548 private String translateElement(final String content, final String fromMarkup, final String toMarkup) 549 { 550 final Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content); 551 final StringBuffer result = new StringBuffer(); 552 553 while (matcher.find()) 554 { 555 matcher.appendReplacement(result, toMarkup); 556 } 557 matcher.appendTail(result); 558 return result.toString(); 559 } 560}