001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import java.io.UnsupportedEncodingException; 022import java.nio.charset.StandardCharsets; 023import java.security.MessageDigest; 024import java.security.NoSuchAlgorithmException; 025import java.text.SimpleDateFormat; 026import java.util.ArrayList; 027import java.util.Calendar; 028import java.util.HashMap; 029import java.util.Map; 030import java.util.Properties; 031import java.util.Set; 032import java.util.regex.Matcher; 033import java.util.regex.Pattern; 034 035/** 036 * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions. 037 * See http://www.wikicreole.org for the WikiCreole spec.</p> 038 * 039 * <p>This translator can be configured through properties defined in 040 * jspwiki.properties starting with "creole.*". See the 041 * jspwiki.properties file for an explanation of the properties</p> 042 * 043 * <p><b>WARNING</b>: This is an experimental feature, and known to be 044 * broken. Use at your own risk.</o> 045 * 046 * 047 * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a> 048 */ 049public class CreoleToJSPWikiTranslator 050{ 051 052 // These variables are expanded so that admins 053 // can display information about the current installed 054 // pagefilter 055 // 056 // The syntax is the same as a wiki var. Unlike a wiki 057 // war though, the CreoleTranslator itself 058 // 059 // [{$creolepagefilter.version}] 060 // [{$creolepagefilter.creoleversion}] 061 // [{$creolepagefilter.linebreak}] -> bloglike/wikilike 062 063 /** The version of the filter. */ 064 public static final String VAR_VERSION = "1.0.3"; 065 066 /** The version of Creole that this filter supports. */ 067 public static final String VAR_CREOLE_VERSION = "1.0"; 068 069 /** The linebreak style "bloglike". */ 070 public static final String VAR_LINEBREAK_BLOGLIKE = "bloglike"; 071 072 /** The linebreak style "c2like". */ 073 public static final String VAR_LINEBREAK_C2LIKE = "c2like"; 074 075 private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))"; 076 077 private static final String JSPWIKI_BOLD = "__$1__$3"; 078 079 private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))"; 080 081 private static final String JSPWIKI_ITALIC = "''$1''$3"; 082 083 private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]"; 084 085 private static final String JSPWIKI_SIMPLELINK = "[$1]"; 086 087 private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]"; 088 089 private static final String JSPWIKI_LINK = "[$2|$1]"; 090 091 private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}"; 092 093 private static final String JSPWIKI_HEADER_0 = "$1!!!$2"; 094 095 private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}"; 096 097 private static final String JSPWIKI_HEADER_1 = "$1!!!$2"; 098 099 private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}"; 100 101 private static final String JSPWIKI_HEADER_2 = "$1!!$2"; 102 103 private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}"; 104 105 private static final String JSPWIKI_HEADER_3 = "$1!$2"; 106 107 private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}"; 108 109 private static final String JSPWIKI_HEADER_4 = "$1__$2__"; 110 111 private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}"; 112 113 private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]"; 114 115 private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}"; 116 117 private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]"; 118 119 private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]"; 120 121 private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]"; 122 123 private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]"; 124 125 private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]"; 126 127 private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E"; 128 129 //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])"; 130 131 //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2"; 132 133 private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)"; 134 135 private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>"; 136 137 private static final String JSPWIKI_PLUGIN = "[{$1}]"; 138 139 private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])"; 140 141 private static final String HTTP_URL = "$1http://$2$3"; 142 143 private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}"; 144 145 private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]"; 146 147 private static final String CREOLE_LINK_IMAG_X = "\\[\\[([^|]*)\\|\\{\\{([^|]*)((\\|)([^|]*)){0,1}((\\|)([^}]*)){0,1}\\}\\}\\]\\]"; 148 149 private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]"; 150 151 private static final String JSPWIKI_TABLE = "$1$2$4"; 152 153 /* TODO Is it possible to use just protect :// ? */ 154 private static final String URL_PROTECTED = "http://|ftp://|https://"; 155 156 private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))"; 157 158 private static final String SIGNATURE = "--~~~"; 159 160 private static final String SIGNATURE_AND_DATE = "--~~~~"; 161 162 private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd"; 163 164 private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)"; 165 166 private static final Map<String, String> c_protectionMap = new HashMap<>(); 167 168 private ArrayList<String> m_hashList = new ArrayList<>(); 169 170 /** 171 * I have no idea what this method does. Could someone please tell me? 172 * 173 * @param wikiProps A property set 174 * @param content The content to translate? 175 * @param username The username in the signature? 176 * @return Probably some translated content. 177 */ 178 public String translateSignature(final Properties wikiProps, final String content, final String username) 179 { 180 181 String dateFormat = wikiProps.getProperty("creole.dateFormat"); 182 183 if (dateFormat == null) 184 { 185 dateFormat = DEFAULT_DATEFORMAT; 186 } 187 188 SimpleDateFormat df; 189 try 190 { 191 df = new SimpleDateFormat(dateFormat); 192 } 193 catch (final Exception e) 194 { 195 e.printStackTrace(); 196 df = new SimpleDateFormat(DEFAULT_DATEFORMAT); 197 } 198 199 String result = content; 200 result = protectMarkup(result, PREFORMATTED_PROTECTED, "", ""); 201 result = protectMarkup(result, URL_PROTECTED, "", ""); 202 203 final Calendar cal = Calendar.getInstance(); 204 result = translateElement(result, SIGNATURE_AND_DATE, "-- [[" + username + "]], " + df.format(cal.getTime())); 205 result = translateElement(result, SIGNATURE, "-- [[" + username + "]]"); 206 result = unprotectMarkup(result, false); 207 return result; 208 } 209 210 /** 211 * Translates Creole markup to JSPWiki markup 212 * 213 * @param wikiProps A set of Wiki Properties 214 * @param content Creole markup 215 * @return Wiki markup 216 */ 217 public String translate(final Properties wikiProps, final String content) 218 { 219 final boolean blogLineBreaks = false; 220 /* 221 // BROKEN, breaks on different platforms. 222 String tmp = wikiProps.getProperty("creole.blogLineBreaks"); 223 if (tmp != null) 224 { 225 if (tmp.trim().equals("true")) 226 blogLineBreaks = true; 227 } 228 */ 229 final String imagePlugin = wikiProps.getProperty("creole.imagePlugin.name"); 230 231 String result = content; 232 // 233 // Breaks on OSX. It is never a good idea to tamper with the linebreaks. JSPWiki always 234 // stores linebreaks as \r\n, regardless of the platform. 235 //result = result.replace("\r\n", "\n"); 236 //result = result.replace("\r", "\n"); 237 238 /* Now protect the rest */ 239 result = protectMarkup(result); 240 result = translateLists(result, "*", "-", "Nothing"); 241 result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD); 242 result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC); 243 result = translateElement(result, WWW_URL, HTTP_URL); 244 245 if (imagePlugin != null && !imagePlugin.equals("")) 246 { 247 result = this.replaceImageArea(wikiProps, result, CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6, imagePlugin); 248 result = this.replaceImageArea(wikiProps, result, CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin); 249 } 250 result = translateElement(result, CREOLE_IMAGE_LINK_DESC, JSPWIKI_IMAGE_LINK_DESC); 251 result = translateElement(result, CREOLE_IMAGE_LINK, JSPWIKI_IMAGE_LINK); 252 result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK); 253 result = translateElement(result, CREOLE_SIMPLELINK, JSPWIKI_SIMPLELINK); 254 result = translateElement(result, CREOLE_HEADER_4, JSPWIKI_HEADER_4); 255 result = translateElement(result, CREOLE_HEADER_3, JSPWIKI_HEADER_3); 256 result = translateElement(result, CREOLE_HEADER_2, JSPWIKI_HEADER_2); 257 result = translateElement(result, CREOLE_HEADER_1, JSPWIKI_HEADER_1); 258 result = translateElement(result, CREOLE_HEADER_0, JSPWIKI_HEADER_0); 259 result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE); 260 result = translateLists(result, "-", "*", "#"); 261 result = translateElement(result, CREOLE_SIMPLEIMAGE, JSPWIKI_SIMPLEIMAGE); 262 result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE); 263 result = replaceArea(result, TABLE_HEADER_PROTECTED, "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2"); 264 265 /* 266 if (blogLineBreaks) 267 { 268 result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS); 269 } 270 */ 271 result = unprotectMarkup(result, true); 272 273 result = translateVariables(result, blogLineBreaks); 274 //result = result.replace("\n", System.getProperty("line.separator")); 275 return result; 276 } 277 278 /** Translates lists. */ 279 private static String translateLists(final String content, final String sourceSymbol, final String targetSymbol, final String sourceSymbol2) 280 { 281 final String[] lines = content.split("\n"); 282 final StringBuilder result = new StringBuilder(); 283 int counter = 0; 284 int inList = -1; 285 for (int i = 0; i < lines.length; i++) 286 { 287 String line = lines[i]; 288 String actSourceSymbol = ""; 289 while ((line.startsWith(sourceSymbol) || line.startsWith(sourceSymbol2)) 290 && (actSourceSymbol.equals("") || line.substring(0, 1).equals(actSourceSymbol))) 291 { 292 actSourceSymbol = line.substring(0, 1); 293 line = line.substring( 1 ); 294 counter++; 295 } 296 if ((inList == -1 && counter != 1) || (inList != -1 && inList + 1 < counter)) 297 { 298 result.append(actSourceSymbol.repeat(Math.max(0, counter))); 299 inList = -1; 300 } 301 else 302 { 303 for (int c = 0; c < counter; c++) 304 { 305 if (actSourceSymbol.equals(sourceSymbol2)) 306 { 307 result.append(sourceSymbol2); 308 } 309 else 310 { 311 result.append(targetSymbol); 312 } 313 } 314 inList = counter; 315 } 316 result.append(line); 317 if (i < lines.length - 1) 318 { 319 result.append("\n"); 320 } 321 counter = 0; 322 } 323 324 // Fixes testExtensions5 325 if( content.endsWith( "\n" ) && result.charAt( result.length()-1 ) != '\n' ) 326 { 327 result.append( '\n' ); 328 } 329 330 return result.toString(); 331 } 332 333 private String translateVariables(String result, final boolean blogLineBreaks) 334 { 335 result = result.replace("[{$creolepagefilter.version}]", VAR_VERSION); 336 result = result.replace("[{$creolepagefilter.creoleversion}]", VAR_CREOLE_VERSION); 337 final String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE : VAR_LINEBREAK_C2LIKE; 338 result = result.replace("[{$creolepagefilter.linebreak}]", linebreaks); 339 return result; 340 } 341 342 /** 343 * Undoes the protection. This is done by replacing the md5 hashes by the 344 * original markup. 345 * 346 * @see #protectMarkup(String) 347 */ 348 private String unprotectMarkup(String content, final boolean replacePlugins) 349 { 350 final Object[] it = this.m_hashList.toArray(); 351 352 for (int i = it.length - 1; i >= 0; i--) 353 { 354 final String hash = (String) it[i]; 355 final String protectedMarkup = c_protectionMap.get(hash); 356 content = content.replace(hash, protectedMarkup); 357 if ((protectedMarkup.length() < 3 || (protectedMarkup.length() > 2 && 358 !protectedMarkup.startsWith("{{{")))&&replacePlugins) 359 content = translateElement(content, CREOLE_PLUGIN, JSPWIKI_PLUGIN); 360 361 } 362 return content; 363 } 364 365 /** 366 * Protects markup that should not be processed. For now this includes: 367 * <ul> 368 * <li>Preformatted sections, they should be ignored</li> 369 * </li> 370 * <li>Protocol strings like <code>http://</code>, they cause problems 371 * because of the <code>//</code> which is interpreted as italic</li> 372 * </ul> 373 * This protection is a simple method to keep the regular expressions for 374 * the other markup simple. Internally the protection is done by replacing 375 * the protected markup with the the md5 hash of the markup. 376 * 377 * @param content 378 * @return The content with protection 379 */ 380 private String protectMarkup(String content) 381 { 382 c_protectionMap.clear(); 383 m_hashList = new ArrayList<>(); 384 content = protectMarkup(content, PREFORMATTED_PROTECTED, "", ""); 385 content = protectMarkup(content, URL_PROTECTED, "", ""); 386 content = protectMarkup(content, ESCAPE_PROTECTED, "", ""); 387 content = protectMarkup(content, CREOLE_PLUGIN, "", ""); 388 389 // content = protectMarkup(content, LINE_PROTECTED); 390 // content = protectMarkup(content, SIGNATURE_PROTECTED); 391 return content; 392 } 393 394 private ArrayList< String[] > readPlaceholderProperties(final Properties wikiProps) 395 { 396 final Set< Object > keySet = wikiProps.keySet(); 397 final Object[] keys = keySet.toArray(); 398 final ArrayList<String[]> result = new ArrayList<>(); 399 400 for (final Object o : keys) { 401 final String key = o + ""; 402 final String value = wikiProps.getProperty(o + ""); 403 if (key.contains("creole.imagePlugin.para.%")) { 404 final String[] pair = new String[2]; 405 pair[0] = key.replaceAll("creole.imagePlugin.para.%", ""); 406 pair[1] = value; 407 result.add(pair); 408 } 409 } 410 return result; 411 } 412 413 private String replaceImageArea(final Properties wikiProps, final String content, final String markupRegex, final String replaceContent, final int groupPos, 414 final String imagePlugin) 415 { 416 final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 417 String contentCopy = content; 418 419 final ArrayList< String[] > plProperties = readPlaceholderProperties(wikiProps); 420 421 while (matcher.find()) 422 { 423 String protectedMarkup = matcher.group(0); 424 final String paramsField = matcher.group(groupPos); 425 final StringBuilder paramsString = new StringBuilder(); 426 427 if (paramsField != null) 428 { 429 final String[] params = paramsField.split(","); 430 431 for (final String s : params) { 432 final String param = s.replaceAll("\\||\\s", "").toUpperCase(); 433 434 // Replace placeholder params 435 for (final String[] pair : plProperties) { 436 final String key = pair[0]; 437 final String value = pair[1]; 438 String code = param.replaceAll("(?i)([0-9]+)" + key, value + "<check>" + "$1" + "</check>"); 439 code = code.replaceAll("(.*?)%(.*?)<check>(.*?)</check>", "$1$3$2"); 440 if (!code.equals(param)) { 441 paramsString.append(code); 442 } 443 } 444 445 // Check if it is a number 446 try { 447 Integer.parseInt(param); 448 paramsString.append(" width='").append(param).append("px'"); 449 } catch (final Exception e) { 450 451 if (wikiProps.getProperty("creole.imagePlugin.para." + param) != null) 452 paramsString.append(" ").append(wikiProps.getProperty("creole.imagePlugin.para." + param) 453 .replaceAll("^(\"|')(.*)(\"|')$", "$2")); 454 } 455 } 456 } 457 final String temp = protectedMarkup; 458 459 protectedMarkup = translateElement(protectedMarkup, markupRegex, replaceContent); 460 protectedMarkup = protectedMarkup.replaceAll("\u2015", paramsString.toString()); 461 protectedMarkup = protectedMarkup.replaceAll("\u2016", imagePlugin); 462 protectedMarkup = protectedMarkup.replaceAll("caption=''", ""); 463 protectedMarkup = protectedMarkup.replaceAll("\\s+", " "); 464 465 final int pos = contentCopy.indexOf(temp); 466 contentCopy = contentCopy.substring(0, pos) + protectedMarkup 467 + contentCopy.substring(pos + temp.length()); 468 } 469 return contentCopy; 470 } 471 472 private String replaceArea(final String content, final String markupRegex, final String replaceSource, final String replaceTarget) 473 { 474 final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 475 String contentCopy = content; 476 477 while (matcher.find()) 478 { 479 String protectedMarkup = matcher.group(0); 480 final String temp = protectedMarkup; 481 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget); 482 final int pos = contentCopy.indexOf(temp); 483 contentCopy = contentCopy.substring(0, pos) + protectedMarkup 484 + contentCopy.substring(pos + temp.length() ); 485 } 486 return contentCopy; 487 } 488 489 /** 490 * Protects a specific markup 491 * 492 * @see #protectMarkup(String) 493 */ 494 private String protectMarkup(final String content, final String markupRegex, final String replaceSource, final String replaceTarget) 495 { 496 final Matcher matcher = Pattern.compile(markupRegex, Pattern.MULTILINE | Pattern.DOTALL).matcher(content); 497 final StringBuffer result = new StringBuffer(); 498 while (matcher.find()) 499 { 500 String protectedMarkup = matcher.group(); 501 protectedMarkup = protectedMarkup.replaceAll(replaceSource, replaceTarget); 502 try 503 { 504 final MessageDigest digest = MessageDigest.getInstance("MD5"); 505 digest.reset(); 506 digest.update(protectedMarkup.getBytes(StandardCharsets.UTF_8)); 507 final String hash = bytesToHash(digest.digest()); 508 matcher.appendReplacement(result, hash); 509 c_protectionMap.put(hash, protectedMarkup); 510 m_hashList.add(hash); 511 } 512 catch (final NoSuchAlgorithmException e) 513 { 514 // FIXME: Should log properly 515 e.printStackTrace(); 516 } 517 } 518 matcher.appendTail(result); 519 return result.toString(); 520 } 521 522 private String bytesToHash(final byte[] b) 523 { 524 final StringBuilder hash = new StringBuilder(); 525 for (final byte value : b) { 526 hash.append(Integer.toString((value & 0xff) + 0x100, 16).substring(1)); 527 } 528 return hash.toString(); 529 } 530 531 private String translateElement(final String content, final String fromMarkup, final String toMarkup) 532 { 533 final Matcher matcher = Pattern.compile(fromMarkup, Pattern.MULTILINE).matcher(content); 534 final StringBuffer result = new StringBuffer(); 535 536 while (matcher.find()) 537 { 538 matcher.appendReplacement(result, toMarkup); 539 } 540 matcher.appendTail(result); 541 return result.toString(); 542 } 543}