001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import java.io.UnsupportedEncodingException; 022import java.nio.charset.StandardCharsets; 023import java.security.MessageDigest; 024import java.security.NoSuchAlgorithmException; 025import java.text.SimpleDateFormat; 026import java.util.ArrayList; 027import java.util.Calendar; 028import java.util.HashMap; 029import java.util.Map; 030import java.util.Properties; 031import java.util.Set; 032import java.util.regex.Matcher; 033import java.util.regex.Pattern; 034 035/** 036 * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions. 037 * See http://www.wikicreole.org for the WikiCreole spec.</p> 038 * 039 * <p>This translator can be configured through properties defined in 040 * jspwiki.properties starting with "creole.*". See the 041 * jspwiki.properties file for an explanation of the properties</p> 042 * 043 * <p><b>WARNING</b>: This is an experimental feature, and known to be 044 * broken. Use at your own risk.</o> 045 * 046 * 047 * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a> 048 */ 049public class CreoleToJSPWikiTranslator 050{ 051 052 // These variables are expanded so that admins 053 // can display information about the current installed 054 // pagefilter 055 // 056 // The syntax is the same as a wiki var. Unlike a wiki 057 // war though, the CreoleTranslator itself 058 // 059 // [{$creolepagefilter.version}] 060 // [{$creolepagefilter.creoleversion}] 061 // [{$creolepagefilter.linebreak}] -> bloglike/wikilike 062 063 /** The version of the filter. */ 064 public static final String VAR_VERSION = "1.0.3"; 065 066 /** The version of Creole that this filter supports. */ 067 public static final String VAR_CREOLE_VERSION = "1.0"; 068 069 /** The linebreak style "bloglike". */ 070 public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike"; 071 072 /** The linebreak style "c2like". */ 073 public static final String VAR_LINEBREAK_C2LIKE = "c2like"; 074 075 private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))"; 076 077 private static final String JSPWIKI_BOLD = "__$1__$3"; 078 079 private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))"; 080 081 private static final String JSPWIKI_ITALIC = "''$1''$3"; 082 083 private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]"; 084 085 private static final String JSPWIKI_SIMPLELINK = "[$1]"; 086 087 private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]"; 088 089 private static final String JSPWIKI_LINK = "[$2|$1]"; 090 091 private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}"; 092 093 private static final String JSPWIKI_HEADER_0 = "$1!!!$2"; 094 095 private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}"; 096 097 private static final String JSPWIKI_HEADER_1 = "$1!!!$2"; 098 099 private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}"; 100 101 private static final String JSPWIKI_HEADER_2 = "$1!!$2"; 102 103 private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}"; 104 105 private static final String JSPWIKI_HEADER_3 = "$1!$2"; 106 107 private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}"; 108 109 private static final String JSPWIKI_HEADER_4 = "$1__$2__"; 110 111 private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}"; 112 113 private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]"; 114 115 private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}"; 116 117 private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]"; 118 119 private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]"; 120 121 private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]"; 122 123 private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]"; 124 125 private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]"; 126 127 private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E"; 128 129 //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])"; 130 131 //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2"; 132 133 private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)"; 134 135 private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>"; 136 137 private static final String JSPWIKI_PLUGIN = "[{$1}]"; 138 139 private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])"; 140 141 private static final String HTTP_URL = "$1http://$2$3"; 142 143 private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}"; 144 145 private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]"; 146 147 private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]"; 148 149 private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]"; 150 151 private static final String JSPWIKI_TABLE = "$1$2$4"; 152 153 /* TODO Is it possible to use just protect :// ? */ 154 private static final String URL_PROTECTED = "http://|ftp://|https://"; 155 156 private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))"; 157 158 private static final String SIGNATURE = "--~~~"; 159 160 private static final String SIGNATURE_AND_DATE = "--~~~~"; 161 162 private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd"; 163 164 private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)"; 165 166 private static final Map<String, String> c_protectionMap = new HashMap<>(); 167 168 private ArrayList<String> m_hashList = new ArrayList<>(); 169 170 /** 171 * I have no idea what this method does. Could someone please tell me? 172 * 173 * @param wikiProps A property set 174 * @param content The content to translate? 175 * @param username The username in the signature? 176 * @return Probably some translated content. 177 */ 178 public String translateSignature(final Properties wikiProps, final String content, final String username) 179 { 180 181 String dateFormat = wikiProps.getProperty("creole.dateFormat"); 182 183 if (dateFormat == null) 184 { 185 dateFormat = DEFAULT_DATEFORMAT; 186 } 187 188 SimpleDateFormat df; 189 try 190 { 191 df = new SimpleDateFormat(dateFormat); 192 } 193 catch (final Exception e) 194 { 195 e.printStackTrace(); 196 df = new SimpleDateFormat(DEFAULT_DATEFORMAT); 197 } 198 199 String result = content; 200 result = protectMarkup(result, PREFORMATTED_PROTECTED, "", ""); 201 result = protectMarkup(result, URL_PROTECTED, "", ""); 202 203 final Calendar cal = Calendar.getInstance(); 204 result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime())); 205 result = translateElement(result, SIGNATURE, "-- [[" + username + "]]"); 206 result = unprotectMarkup(result, false); 207 return result; 208 } 209 210 /** 211 * Translates Creole markup to JSPWiki markup 212 * 213 * @param wikiProps A set of Wiki Properties 214 * @param content Creole markup 215 * @return Wiki markup 216 */ 217 public String translate(final Properties wikiProps, final String content) 218 { 219 final boolean blogLineBreaks = false; 220 /* 221 // BROKEN, breaks on different platforms. 222 String tmp = wikiProps.getProperty("creole.blogLineBreaks"); 223 if (tmp != null) 224 { 225 if (tmp.trim().equals("true")) 226 blogLineBreaks = true; 227 } 228 */ 229 final String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name"); 230 231 String result = content; 232 // 233 // Breaks on OSX. It is never a good idea to tamper with the linebreaks. JSPWiki always 234 // stores linebreaks as \r\n, regardless of the platform. 235 //result = result.replace("\r\n", "\n"); 236 //result = result.replace("\r", "\n"); 237 238 /* Now protect the rest */ 239 result = protectMarkup(result); 240 result = translateLists(result, "*", "-", "Nothing"); 241 result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD); 242 result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC); 243 result = translateElement(result, WWW_URL, HTTP_URL); 244 245 if (imagePlugin != null && !imagePlugin.equals("")) 246 { 247 result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin); 248 result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin); 249 } 250 result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC); 251 result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK); 252 result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK); 253 result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK); 254 result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4); 255 result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3); 256 result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2); 257 result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1); 258 result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0); 259 result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE); 260 result = translateLists(result, "-", "*", "#"); 261 result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE); 262 result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE); 263 result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2"); 264 265 /* 266 if (blogLineBreaks) 267 { 268 result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS); 269 } 270 */ 271 result = unprotectMarkup(result, true); 272 273 result = translateVariables(result, blogLineBreaks); 274 //result = result.replace("\n", System.getProperty("line.separator")); 275 return result; 276 } 277 278 /** Translates lists. */ 279 private static String translateLists(final String content, final String sourceSymbol, final String targetSymbol, final String sourceSymbol2) 280 { 281 final String[] lines = content.split("\n"); 282 final StringBuilder result = new StringBuilder(); 283 int counter = 0; 284 int inList = -1; 285 for (int i = 0; i < lines.length; i++) 286 { 287 String line = lines[i]; 288 String actSourceSymbol = ""; 289 while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2)) 290 && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol))) 291 { 292 actSourceSymbol = line.substring(0, 1); 293 line = line.substring( 1 ); 294 counter++; 295 } 296 if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter)) 297 { 298 for (int c = 0; c < counter; c++) 299 { 300 result.append(actSourceSymbol); 301 } 302 inList = -1; 303 } 304 else 305 { 306 for (int c = 0; c < counter; c++) 307 { 308 if (actSourceSymbol.equals(sourceSymbol2)) 309 { 310 result.append(sourceSymbol2); 311 } 312 else 313 { 314 result.append(targetSymbol); 315 } 316 } 317 inList = counter; 318 } 319 result.append(line); 320 if (i < lines.length - 1) 321 { 322 result.append("\n"); 323 } 324 counter = 0; 325 } 326 327 // Fixes testExtensions5 328 if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' ) 329 { 330 result.append( '\n' ); 331 } 332 333 return result.toString(); 334 } 335 336 private String translateVariables(String result, final boolean blogLineBreaks) 337 { 338 result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION); 339 result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION); 340 final String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE; 341 result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks); 342 return result; 343 } 344 345 /** 346 * Undoes the protection. This is done by replacing the md5 hashes by the 347 * original markup. 348 * 349 * @see #protectMarkup(String) 350 */ 351 private String unprotectMarkup(String content, final boolean replacePlugins) 352 { 353 final Object[] it = this.m_hashList.toArray(); 354 355 for (int i = it.length - 1; i >= 0; i--) 356 { 357 final String hash = (String) it[i]; 358 final String protectedMarkup = c_protectionMap.get(hash); 359 content = content.replace(hash, protectedMarkup); 360 if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 && 361 !protectedMarkup.startsWith("{{{")))&&replacePlugins) 362 content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN); 363 364 } 365 return content; 366 } 367 368 /** 369 * Protects markup that should not be processed. For now this includes: 370 * <ul> 371 * <li>Preformatted sections, they should be ignored</li> 372 * </li> 373 * <li>Protocol strings like <code>http://</code>, they cause problems 374 * because of the <code>//</code> which is interpreted as italic</li> 375 * </ul> 376 * This protection is a simple method to keep the regular expressions for 377 * the other markup simple. Internally the protection is done by replacing 378 * the protected markup with the the md5 hash of the markup. 379 * 380 * @param content 381 * @return The content with protection 382 */ 383 private String protectMarkup(String content) 384 { 385 c_protectionMap.clear(); 386 m_hashList = new ArrayList<>(); 387 content = protectMarkup(content, PREFORMATTED_PROTECTED, "", ""); 388 content = protectMarkup(content, URL_PROTECTED, "", ""); 389 content = protectMarkup(content, ESCAPE_PROTECTED, "", ""); 390 content = protectMarkup(content, CREOLE_PLUGIN, "", ""); 391 392 // content = protectMarkup(content, LINE_PROTECTED); 393 // content = protectMarkup(content, SIGNATURE_PROTECTED); 394 return content; 395 } 396 397 private ArrayList< String[] > readPlaceholderProperties(final Properties wikiProps) 398 { 399 final Set< Object > keySet = wikiProps.keySet(); 400 final Object[] keys = keySet.toArray(); 401 final ArrayList<String[]> result = new ArrayList<>(); 402 403 for (final Object o : keys) { 404 final String key = o + ""; 405 final String value = wikiProps.getProperty(o + ""); 406 if (key.contains("creole.imagePlugin.para.%")) { 407 final String[] pair = new String[2]; 408 pair[0] = key.replaceAll("creole.imagePlugin.para.%", ""); 409 pair[1] = value; 410 result.add(pair); 411 } 412 } 413 return result; 414 } 415 416 private String replaceImageArea(final Properties wikiProps, final String content, final String markupRegex, final String replaceContent, final int groupPos, 417 final String imagePlugin) 418 { 419 final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 420 String contentCopy = content; 421 422 final ArrayList< String[] > plProperties = readPlaceholderProperties(wikiProps); 423 424 while (matcher.find()) 425 { 426 String protectedMarkup = matcher.group(0); 427 final String paramsField = matcher.group(groupPos); 428 final StringBuilder paramsString = new StringBuilder(); 429 430 if (paramsField != null) 431 { 432 final String[] params = paramsField.split(","); 433 434 for (final String s : params) { 435 final String param = s.replaceAll("\\||\\s", "").toUpperCase(); 436 437 // Replace placeholder params 438 for (final String[] pair : plProperties) { 439 final String key = pair[0]; 440 final String value = pair[1]; 441 String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>"); 442 code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2"); 443 if (!code.equals(param)) { 444 paramsString.append(code); 445 } 446 } 447 448 // Check if it is a number 449 try { 450 Integer.parseInt(param); 451 paramsString.append(" width='").append(param).append("px'"); 452 } catch (final Exception e) { 453 454 if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null) 455 paramsString.append(" ").append(wikiProps.getProperty("creole.imagePlugin.para." + param) 456 .replaceAll("^(\"|')(.*)(\"|')$", "$2")); 457 } 458 } 459 } 460 final String temp = protectedMarkup; 461 462 protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent); 463 protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString.toString()); 464 protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin); 465 protectedMarkup = protectedMarkup.replaceAll("caption=''", ""); 466 protectedMarkup = protectedMarkup.replaceAll("\\s+", " "); 467 468 final int pos = contentCopy.indexOf(temp); 469 contentCopy = contentCopy.substring(0, pos) + protectedMarkup 470 + contentCopy.substring(pos + temp.length()); 471 } 472 return contentCopy; 473 } 474 475 private String replaceArea(final String content, final String markupRegex, final String replaceSource, final String replaceTarget) 476 { 477 final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 478 String contentCopy = content; 479 480 while (matcher.find()) 481 { 482 String protectedMarkup = matcher.group(0); 483 final String temp = protectedMarkup; 484 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget); 485 final int pos = contentCopy.indexOf(temp); 486 contentCopy = contentCopy.substring(0, pos) + protectedMarkup 487 + contentCopy.substring(pos + temp.length() ); 488 } 489 return contentCopy; 490 } 491 492 /** 493 * Protects a specific markup 494 * 495 * @see #protectMarkup(String) 496 */ 497 private String protectMarkup(final String content, final String markupRegex, final String replaceSource, final String replaceTarget) 498 { 499 final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 500 final StringBuffer result = new StringBuffer(); 501 while (matcher.find()) 502 { 503 String protectedMarkup = matcher.group(); 504 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget); 505 try 506 { 507 final MessageDigest digest = MessageDigest.getInstance("MD5"); 508 digest.reset(); 509 digest.update(protectedMarkup.getBytes(StandardCharsets.UTF_8.name())); 510 final String hash = bytesToHash(digest.digest()); 511 matcher.appendReplacement(result, hash); 512 c_protectionMap.put(hash, protectedMarkup); 513 m_hashList.add(hash); 514 } 515 catch (final NoSuchAlgorithmException e) 516 { 517 // FIXME: Should log properly 518 e.printStackTrace(); 519 } 520 catch (final UnsupportedEncodingException e) 521 { 522 // FIXME: Auto-generated catch block 523 e.printStackTrace(); 524 } 525 } 526 matcher.appendTail(result); 527 return result.toString(); 528 } 529 530 private String bytesToHash(final byte[] b) 531 { 532 final StringBuilder hash = new StringBuilder(); 533 for (final byte value : b) { 534 hash.append(Integer.toString((value & 0xff) + 0x100, 16).substring(1)); 535 } 536 return hash.toString(); 537 } 538 539 private String translateElement(final String content, final String fromMarkup, final String toMarkup) 540 { 541 final Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content); 542 final StringBuffer result = new StringBuffer(); 543 544 while (matcher.find()) 545 { 546 matcher.appendReplacement(result, toMarkup); 547 } 548 matcher.appendTail(result); 549 return result.toString(); 550 } 551}