001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.util; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.logging.log4j.LogManager; 023import org.apache.logging.log4j.Logger; 024 025import java.io.File; 026import java.io.IOException; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.security.SecureRandom; 030import java.util.NoSuchElementException; 031import java.util.Properties; 032import java.util.Random; 033 034 035/** 036 * Contains a number of static utility methods. 037 */ 038public final class TextUtil { 039 040 private static final Logger LOG = LogManager.getLogger( TextUtil.class ); 041 042 static final String HEX_DIGITS = "0123456789ABCDEF"; 043 044 /** Pick from some letters that won't be easily mistaken for each other to compose passwords. So, for example, omit o, O and 0, or 1, l and L.*/ 045 static final String PWD_BASE = "abcdefghjkmnpqrstuvwxyzABCDEFGHJKMNPQRSTUVWXYZ23456789+@"; 046 047 /** Length of password. {@link #generateRandomPassword() */ 048 public static final int PASSWORD_LENGTH = 8; 049 050 /** Lists all punctuation characters allowed in WikiMarkup. These will not be cleaned away. This is for compatibility for older versions 051 of JSPWiki. */ 052 public static final String LEGACY_CHARS_ALLOWED = "._"; 053 054 /** Lists all punctuation characters allowed in page names. */ 055 public static final String PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$"; 056 057 /** Private constructor prevents instantiation. */ 058 private TextUtil() {} 059 060 /** 061 * java.net.URLEncoder.encode() method in JDK < 1.4 is buggy. This duplicates its functionality. 062 * 063 * @param rs the string to encode 064 * @return the URL-encoded string 065 */ 066 static String urlEncode( final byte[] rs ) { 067 final StringBuilder result = new StringBuilder( rs.length * 2 ); 068 069 // Does the URLEncoding. We could use the java.net one, but it does not eat byte[]s. 070 for( final byte r : rs ) { 071 final char c = ( char )r; 072 switch( c ) { 073 case '_': 074 case '.': 075 case '*': 076 case '-': 077 case '/': 078 result.append( c ); 079 break; 080 case ' ': 081 result.append( '+' ); 082 break; 083 default: 084 if( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) ) { 085 result.append( c ); 086 } else { 087 result.append( '%' ); 088 result.append( HEX_DIGITS.charAt( ( c & 0xF0 ) >> 4 ) ); 089 result.append( HEX_DIGITS.charAt( c & 0x0F ) ); 090 } 091 } 092 } 093 094 return result.toString(); 095 } 096 097 /** 098 * URL encoder does not handle all characters correctly. See <A HREF="http://developer.java.sun.com/developer/bugParade/bugs/4257115.html"> 099 * Bug parade, bug #4257115</A> for more information. 100 * <P> 101 * Thanks to CJB for this fix. 102 * 103 * @param bytes The byte array containing the bytes of the string 104 * @param encoding The encoding in which the string should be interpreted 105 * @return A decoded String 106 * 107 * @throws IllegalArgumentException If the byte array is not a valid string. 108 */ 109 static String urlDecode( final byte[] bytes, final String encoding ) throws IllegalArgumentException { 110 if( bytes == null ) { 111 return null; 112 } 113 114 final byte[] decodeBytes = new byte[ bytes.length ]; 115 int decodedByteCount = 0; 116 117 try { 118 for( int count = 0; count < bytes.length; count++ ) { 119 switch( bytes[count] ) { 120 case '+': 121 decodeBytes[decodedByteCount++] = ( byte ) ' '; 122 break ; 123 124 case '%': 125 decodeBytes[decodedByteCount++] = ( byte )( ( HEX_DIGITS.indexOf( bytes[++count] ) << 4 ) + 126 ( HEX_DIGITS.indexOf( bytes[++count] ) ) ); 127 break ; 128 129 default: 130 decodeBytes[decodedByteCount++] = bytes[count] ; 131 } 132 } 133 134 } catch( final IndexOutOfBoundsException ae ) { 135 throw new IllegalArgumentException( "Malformed UTF-8 string?" ); 136 } 137 138 return new String(decodeBytes, 0, decodedByteCount, Charset.forName( encoding ) ); 139 } 140 141 /** 142 * As java.net.URLEncoder class, but this does it in UTF8 character set. 143 * 144 * @param text The text to decode 145 * @return An URLEncoded string. 146 */ 147 public static String urlEncodeUTF8( final String text ) { 148 // If text is null, just return an empty string 149 if ( text == null ) { 150 return ""; 151 } 152 153 return urlEncode( text.getBytes( StandardCharsets.UTF_8 ) ); 154 } 155 156 /** 157 * As java.net.URLDecoder class, but for UTF-8 strings. null is a safe value and returns null. 158 * 159 * @param utf8 The UTF-8 encoded string 160 * @return A plain, normal string. 161 */ 162 public static String urlDecodeUTF8( final String utf8 ) { 163 if( utf8 == null ) { 164 return null; 165 } 166 167 return urlDecode( utf8.getBytes( StandardCharsets.ISO_8859_1 ), StandardCharsets.UTF_8.toString() ); 168 } 169 170 /** 171 * Provides encoded version of string depending on encoding. Encoding may be UTF-8 or ISO-8859-1 (default). 172 * 173 * <p>This implementation is the same as in FileSystemProvider.mangleName(). 174 * 175 * @param data A string to encode 176 * @param encoding The encoding in which to encode 177 * @return A URL encoded string. 178 */ 179 public static String urlEncode( final String data, final String encoding ) { 180 // Presumably, the same caveats apply as in FileSystemProvider. Don't see why it would be horribly kludgy, though. 181 if( StandardCharsets.UTF_8.toString().equals( encoding ) ) { 182 return urlEncodeUTF8( data ); 183 } 184 185 return urlEncode( data.getBytes( Charset.forName( encoding ) ) ); 186 } 187 188 /** 189 * Provides decoded version of string depending on encoding. Encoding may be UTF-8 or ISO-8859-1 (default). 190 * 191 * <p>This implementation is the same as in FileSystemProvider.unmangleName(). 192 * 193 * @param data The URL-encoded string to decode 194 * @param encoding The encoding to use 195 * @return A decoded string. 196 * @throws IllegalArgumentException If the data cannot be decoded. 197 */ 198 public static String urlDecode( final String data, final String encoding ) throws IllegalArgumentException { 199 // Presumably, the same caveats apply as in FileSystemProvider. Don't see why it would be horribly kludgy, though. 200 if( StandardCharsets.UTF_8.name().equals( encoding ) ) { 201 return urlDecodeUTF8( data ); 202 } 203 204 return urlDecode( data.getBytes( Charset.forName( encoding ) ), encoding ); 205 } 206 207 /** 208 * Replaces the relevant entities inside the String. All & >, <, and " are replaced by their respective names. 209 * 210 * @since 1.6.1 211 * @param src The source string. 212 * @return The encoded string. 213 */ 214 public static String replaceEntities( String src ) { 215 src = replaceString( src, "&", "&" ); 216 src = replaceString( src, "<", "<" ); 217 src = replaceString( src, ">", ">" ); 218 src = replaceString( src, "\"", """ ); 219 220 return src; 221 } 222 223 /** 224 * Replaces a string with another string. 225 * 226 * @param orig Original string. Null is safe. 227 * @param src The string to find. 228 * @param dest The string to replace <I>src</I> with. 229 * @return A string with the replacement done. 230 */ 231 public static String replaceString( final String orig, final String src, final String dest ) { 232 if ( orig == null ) { 233 return null; 234 } 235 if ( src == null || dest == null ) { 236 throw new NullPointerException(); 237 } 238 if ( src.isEmpty() ) { 239 return orig; 240 } 241 242 final StringBuilder res = new StringBuilder( orig.length() + 20 ); // Pure guesswork 243 int start; 244 int end = 0; 245 int last = 0; 246 247 while ( ( start = orig.indexOf( src,end ) ) != -1 ) { 248 res.append( orig, last, start ); 249 res.append( dest ); 250 end = start + src.length(); 251 last = start + src.length(); 252 } 253 res.append( orig.substring( end ) ); 254 255 return res.toString(); 256 } 257 258 /** 259 * Replaces a part of a string with a new String. 260 * 261 * @param start Where in the original string the replacing should start. 262 * @param end Where the replacing should end. 263 * @param orig Original string. Null is safe. 264 * @param text The new text to insert into the string. 265 * @return The string with the orig replaced with text. 266 */ 267 public static String replaceString( final String orig, final int start, final int end, final String text ) { 268 if( orig == null ) { 269 return null; 270 } 271 272 final StringBuilder buf = new StringBuilder( orig ); 273 buf.replace( start, end, text ); 274 return buf.toString(); 275 } 276 277 /** 278 * Replaces a string with another string. Case-insensitive matching is used 279 * 280 * @param orig Original string. Null is safe. 281 * @param src The string to find. 282 * @param dest The string to replace <em>src</em> with. 283 * @return A string with all instances of src replaced with dest. 284 */ 285 public static String replaceStringCaseUnsensitive( final String orig, final String src, final String dest ) { 286 if( orig == null ) { 287 return null; 288 } 289 290 final StringBuilder res = new StringBuilder(); 291 int start; 292 int end = 0; 293 int last = 0; 294 295 final String origCaseUnsn = orig.toLowerCase(); 296 final String srcCaseUnsn = src.toLowerCase(); 297 while( ( start = origCaseUnsn.indexOf( srcCaseUnsn, end ) ) != -1 ) { 298 res.append( orig, last, start ); 299 res.append( dest ); 300 end = start + src.length(); 301 last = start + src.length(); 302 } 303 res.append( orig.substring( end ) ); 304 305 return res.toString(); 306 } 307 308 /** 309 * Parses an integer parameter, returning a default value if the value is null or a non-number. 310 * 311 * @param value The value to parse 312 * @param defvalue A default value in case the value is not a number 313 * @return The parsed value (or defvalue). 314 */ 315 public static int parseIntParameter( final String value, final int defvalue ) { 316 try { 317 return Integer.parseInt( value.trim() ); 318 } catch( final Exception e ) {} 319 320 return defvalue; 321 } 322 323 /** 324 * Gets an integer-valued property from a standard Properties list. 325 * 326 * Before inspecting the props, we first check if there is a Java System Property with the same name, if it exists we use that value, 327 * if not we check an environment variable with that (almost) same name, almost meaning we replace dots with underscores. 328 * 329 * If the value does not exist, or is a non-integer, returns defVal. 330 * 331 * @since 2.1.48. 332 * @param props The property set to look through 333 * @param key The key to look for 334 * @param defVal If the property is not found or is a non-integer, returns this value. 335 * @return The property value as an integer (or defVal). 336 */ 337 public static int getIntegerProperty( final Properties props, final String key, final int defVal ) { 338 String val = System.getProperties().getProperty( key, System.getenv( StringUtils.replace( key,".","_" ) ) ); 339 if( val == null ) { 340 val = props.getProperty( key ); 341 } 342 return parseIntParameter( val, defVal ); 343 } 344 345 /** 346 * Gets a boolean property from a standard Properties list. Returns the default value, in case the key has not been set. 347 * Before inspecting the props, we first check if there is a Java System Property with the same name, if it exists 348 * we use that value, if not we check an environment variable with that (almost) same name, almost meaning we replace 349 * dots with underscores. 350 * <P> 351 * The possible values for the property are "true"/"false", "yes"/"no", or "on"/"off". Any value not recognized is always defined 352 * as "false". 353 * 354 * @param props A list of properties to search. 355 * @param key The property key. 356 * @param defval The default value to return. 357 * 358 * @return True, if the property "key" was set to "true", "on", or "yes". 359 * 360 * @since 2.0.11 361 */ 362 public static boolean getBooleanProperty( final Properties props, final String key, final boolean defval ) { 363 String val = System.getProperties().getProperty( key, System.getenv( StringUtils.replace( key,".","_" ) ) ); 364 if( val == null ) { 365 val = props.getProperty( key ); 366 } 367 if( val == null ) { 368 return defval; 369 } 370 371 return isPositive( val ); 372 } 373 374 /** 375 * Fetches a String property from the set of Properties. This differs from Properties.getProperty() in a 376 * couple of key respects: First, property value is trim()med (so no extra whitespace back and front). 377 * 378 * Before inspecting the props, we first check if there is a Java System Property with the same name, if it exists 379 * we use that value, if not we check an environment variable with that (almost) same name, almost meaning we replace 380 * dots with underscores. 381 * 382 * @param props The Properties to search through 383 * @param key The property key 384 * @param defval A default value to return, if the property does not exist. 385 * @return The property value. 386 * @since 2.1.151 387 */ 388 public static String getStringProperty( final Properties props, final String key, final String defval ) { 389 String val = System.getProperties().getProperty( key, System.getenv( StringUtils.replace( key,".","_" ) ) ); 390 if( val == null ) { 391 val = props.getProperty( key ); 392 } 393 if( val == null ) { 394 return defval; 395 } 396 return val.trim(); 397 } 398 399 /** 400 * {@link #getStringProperty(Properties, String, String)} overload that handles deprecated keys, so that a key and its 401 * deprecated counterpart can coexist in a given version of JSPWiki. 402 * 403 * @param props The Properties to search through 404 * @param key The property key 405 * @param deprecatedKey the property key being superseeded by key 406 * @param defval A default value to return, if the property does not exist. 407 * @return The property value. 408 */ 409 public static String getStringProperty( final Properties props, final String key, final String deprecatedKey, final String defval ) { 410 final String val = getStringProperty( props, deprecatedKey, null ); 411 if( val != null ) { 412 LOG.warn( "{} is being deprecated and will be removed on a future version, please consider using {} instead " + 413 "in your jspwiki[-custom].properties file", deprecatedKey, key ); 414 return val; 415 } 416 return getStringProperty( props, key, defval ); 417 } 418 419 /** 420 * Throws an exception if a property is not found. 421 * 422 * @param props A set of properties to search the key in. 423 * @param key The key to look for. 424 * @return The required property 425 * 426 * @throws NoSuchElementException If the search key is not in the property set. 427 * @since 2.0.26 (on TextUtils, moved To WikiEngine on 2.11.0-M1 and back to TextUtils on 2.11.0-M6) 428 */ 429 public static String getRequiredProperty( final Properties props, final String key ) throws NoSuchElementException { 430 final String value = getStringProperty( props, key, null ); 431 if( value == null ) { 432 throw new NoSuchElementException( "Required property not found: " + key ); 433 } 434 return value; 435 } 436 437 /** 438 * {@link #getRequiredProperty(Properties, String)} overload that handles deprecated keys, so that a key and its 439 * deprecated counterpart can coexist in a given version of JSPWiki. 440 * 441 * @param props The Properties to search through 442 * @param key The property key 443 * @param deprecatedKey the property key being superseeded by key 444 * @return The property value. 445 */ 446 public static String getRequiredProperty( final Properties props, final String key, final String deprecatedKey ) throws NoSuchElementException { 447 final String value = getStringProperty( props, deprecatedKey, null ); 448 if( value == null ) { 449 return getRequiredProperty( props, key ); 450 } 451 LOG.warn( "{} is being deprecated and will be removed on a future version, please consider using {} instead " + 452 "in your jspwiki[-custom].properties file", deprecatedKey, key ); 453 return value; 454 } 455 456 /** 457 * Fetches a file path property from the set of Properties. 458 * 459 * Before inspecting the props, we first check if there is a Java System Property with the same name, if it exists we use that value, 460 * if not we check an environment variable with that (almost) same name, almost meaning we replace dots with underscores. 461 * 462 * If the implementation fails to create the canonical path it just returns the original value of the property which is a bit doggy. 463 * 464 * @param props The Properties to search through 465 * @param key The property key 466 * @param defval A default value to return, if the property does not exist. 467 * @return the canonical path of the file or directory being referenced 468 * @since 2.10.1 469 */ 470 public static String getCanonicalFilePathProperty( final Properties props, final String key, final String defval ) { 471 String val = System.getProperties().getProperty( key, System.getenv( StringUtils.replace( key,".","_" ) ) ); 472 if( val == null ) { 473 val = props.getProperty( key ); 474 } 475 476 if( val == null ) { 477 val = defval; 478 } 479 480 String result; 481 try { 482 result = new File( new File( val.trim() ).getCanonicalPath() ).getAbsolutePath(); 483 } catch( final IOException e ) { 484 result = val.trim(); 485 } 486 return result; 487 } 488 489 /** 490 * Returns true, if the string "val" denotes a positive string. Allowed values are "yes", "on", and "true". 491 * Comparison is case-insignificant. Null values are safe. 492 * 493 * @param val Value to check. 494 * @return True, if val is "true", "on", or "yes"; otherwise false. 495 * 496 * @since 2.0.26 497 */ 498 public static boolean isPositive( String val ) { 499 if( val == null ) { 500 return false; 501 } 502 val = val.trim(); 503 return val.equalsIgnoreCase( "true" ) 504 || val.equalsIgnoreCase( "on" ) 505 || val.equalsIgnoreCase( "yes" ); 506 } 507 508 /** 509 * Makes sure that the POSTed data is conforms to certain rules. These rules are: 510 * <UL> 511 * <LI>The data always ends with a newline (some browsers, such as NS4.x series, does not send a newline at 512 * the end, which makes the diffs a bit strange sometimes. 513 * <LI>The CR/LF/CRLF mess is normalized to plain CRLF. 514 * </UL> 515 * 516 * The reason why we're using CRLF is that most browser already return CRLF since that is the closest thing to an HTTP standard. 517 * 518 * @param postData The data to normalize 519 * @return Normalized data 520 */ 521 public static String normalizePostData( final String postData ) { 522 final StringBuilder sb = new StringBuilder(); 523 for( int i = 0; i < postData.length(); i++ ) { 524 switch( postData.charAt(i) ) { 525 case 0x0a: // LF, UNIX 526 sb.append( "\r\n" ); 527 break; 528 529 case 0x0d: // CR, either Mac or MSDOS 530 sb.append( "\r\n" ); 531 // If it's MSDOS, skip the LF so that we don't add it again. 532 if( i < postData.length() - 1 && postData.charAt( i + 1 ) == 0x0a ) { 533 i++; 534 } 535 break; 536 537 default: 538 sb.append( postData.charAt( i ) ); 539 break; 540 } 541 } 542 543 if( sb.length() < 2 || !sb.substring( sb.length()-2 ).equals( "\r\n" ) ) { 544 sb.append( "\r\n" ); 545 } 546 547 return sb.toString(); 548 } 549 550 private static final int EOI = 0; 551 private static final int LOWER = 1; 552 private static final int UPPER = 2; 553 private static final int DIGIT = 3; 554 private static final int OTHER = 4; 555 private static final Random RANDOM = new SecureRandom(); 556 557 private static int getCharKind( final int c ) { 558 if( c == -1 ) { 559 return EOI; 560 } 561 562 final char ch = ( char )c; 563 564 if( Character.isLowerCase( ch ) ) { 565 return LOWER; 566 } else if( Character.isUpperCase( ch ) ) { 567 return UPPER; 568 } else if( Character.isDigit( ch ) ) { 569 return DIGIT; 570 } else { 571 return OTHER; 572 } 573 } 574 575 /** 576 * Adds spaces in suitable locations of the input string. This is used to transform a WikiName into a more readable format. 577 * 578 * @param s String to be beautified. 579 * @return A beautified string. 580 */ 581 public static String beautifyString( final String s ) { 582 return beautifyString( s, " " ); 583 } 584 585 /** 586 * Adds spaces in suitable locations of the input string. This is used to transform a WikiName into a more readable format. 587 * 588 * @param s String to be beautified. 589 * @param space Use this string for the space character. 590 * @return A beautified string. 591 * @since 2.1.127 592 */ 593 public static String beautifyString( final String s, final String space ) { 594 if( s == null || s.isEmpty() ) { 595 return ""; 596 } 597 598 final StringBuilder result = new StringBuilder(); 599 600 int cur = s.charAt( 0 ); 601 int curKind = getCharKind( cur ); 602 603 int prevKind = LOWER; 604 int nextKind; 605 int next; 606 int nextPos = 1; 607 608 while( curKind != EOI ) { 609 next = ( nextPos < s.length() ) ? s.charAt( nextPos++ ) : -1; 610 nextKind = getCharKind( next ); 611 612 if( ( prevKind == UPPER ) && ( curKind == UPPER ) && ( nextKind == LOWER ) ) { 613 result.append( space ); 614 result.append( ( char ) cur ); 615 } else { 616 result.append((char) cur ); 617 if( ( ( curKind == UPPER ) && (nextKind == DIGIT) ) 618 || ( ( curKind == LOWER ) && ( ( nextKind == DIGIT ) || ( nextKind == UPPER ) ) ) 619 || ( ( curKind == DIGIT ) && ( ( nextKind == UPPER ) || ( nextKind == LOWER ) ) ) ) { 620 result.append( space ); 621 } 622 } 623 prevKind = curKind; 624 cur = next; 625 curKind = nextKind; 626 } 627 628 return result.toString(); 629 } 630 631 /** 632 * Cleans a Wiki name based on a list of characters. Also, any multiple whitespace is collapsed into a single space, and any 633 * leading or trailing space is removed. 634 * 635 * @param text text to be cleared. Null is safe, and causes this to return null. 636 * @param allowedChars Characters which are allowed in the string. 637 * @return A cleaned text. 638 * 639 * @since 2.6 640 */ 641 public static String cleanString( String text, final String allowedChars ) { 642 if( text == null ) { 643 return null; 644 } 645 646 text = text.trim(); 647 final StringBuilder clean = new StringBuilder( text.length() ); 648 649 // Remove non-alphanumeric characters that should not be put inside WikiNames. Note that all valid Unicode letters are 650 // considered okay for WikiNames. It is the problem of the WikiPageProvider to take care of actually storing that information. 651 // 652 // Also capitalize things, if necessary. 653 654 boolean isWord = true; // If true, we've just crossed a word boundary 655 boolean wasSpace = false; 656 for( int i = 0; i < text.length(); i++ ) { 657 char ch = text.charAt( i ); 658 659 // Cleans away repetitive whitespace and only uses the first one. 660 if( Character.isWhitespace( ch ) ) { 661 if( wasSpace ) { 662 continue; 663 } 664 665 wasSpace = true; 666 } else { 667 wasSpace = false; 668 } 669 670 // Check if it is allowed to use this char, and capitalize, if necessary. 671 if( Character.isLetterOrDigit( ch ) || allowedChars.indexOf( ch ) != -1 ) { 672 // Is a letter 673 if( isWord ) { 674 ch = Character.toUpperCase( ch ); 675 } 676 clean.append( ch ); 677 isWord = false; 678 } else { 679 isWord = true; 680 } 681 } 682 683 return clean.toString(); 684 } 685 686 /** 687 * Creates a Properties object based on an array which contains alternatively a key and a value. It is useful 688 * for generating default mappings. For example: 689 * <pre> 690 * String[] properties = { "jspwiki.property1", "value1", "jspwiki.property2", "value2 }; 691 * Properties props = TextUtil.createPropertes( values ); 692 * System.out.println( props.getProperty("jspwiki.property1") ); 693 * </pre> 694 * would output "value1". 695 * 696 * @param values Alternating key and value pairs. 697 * @return Property object 698 * @see java.util.Properties 699 * @throws IllegalArgumentException if the property array is missing a value for a key. 700 * @since 2.2. 701 */ 702 public static Properties createProperties( final String[] values ) throws IllegalArgumentException { 703 if( values.length % 2 != 0 ) { 704 throw new IllegalArgumentException( "One value is missing."); 705 } 706 707 final Properties props = new Properties(); 708 for( int i = 0; i < values.length; i += 2 ) { 709 props.setProperty( values[i], values[i + 1] ); 710 } 711 712 return props; 713 } 714 715 /** 716 * Counts the number of sections (separated with "----") from the page. 717 * 718 * @param pagedata The WikiText to parse. 719 * @return int Number of counted sections. 720 * @since 2.1.86. 721 */ 722 public static int countSections( final String pagedata ) { 723 int tags = 0; 724 int start = 0; 725 726 while( ( start = pagedata.indexOf( "----", start ) ) != -1 ) { 727 tags++; 728 start += 4; // Skip this "----" 729 } 730 731 // The first section does not get the "----" 732 return !pagedata.isEmpty() ? tags + 1 : 0; 733 } 734 735 /** 736 * Gets the given section (separated with "----") from the page text. Note that the first section is always #1. If a page has no 737 * section markers, then there is only a single section, #1. 738 * 739 * @param pagedata WikiText to parse. 740 * @param section Which section to get. 741 * @return String The section. 742 * @throws IllegalArgumentException If the page does not contain this many sections. 743 * @since 2.1.86. 744 */ 745 public static String getSection( final String pagedata, final int section ) throws IllegalArgumentException { 746 int tags = 0; 747 int start = 0; 748 int previous = 0; 749 750 while( ( start = pagedata.indexOf( "----", start ) ) != -1 ) { 751 if( ++tags == section ) { 752 return pagedata.substring( previous, start ); 753 } 754 755 start += 4; // Skip this "----" 756 // allow additional dashes, treat it as if it was a correct 4-dash 757 while (start < pagedata.length() && pagedata.charAt( start ) == '-') { 758 start++; 759 } 760 761 previous = start; 762 } 763 764 if( ++tags == section ) { 765 return pagedata.substring( previous ); 766 } 767 768 throw new IllegalArgumentException( "There is no section no. " + section + " on the page." ); 769 } 770 771 /** 772 * A simple routine which just repeates the arguments. This is useful for creating something like a line or something. 773 * 774 * @param what String to repeat 775 * @param times How many times to repeat the string. 776 * @return Guess what? 777 * @since 2.1.98. 778 */ 779 public static String repeatString( final String what, final int times ) { 780 final StringBuilder sb = new StringBuilder(); 781 for( int i = 0; i < times; i++ ) { 782 sb.append( what ); 783 } 784 785 return sb.toString(); 786 } 787 788 /** 789 * Converts a string from the Unicode representation into something that can be embedded in a java 790 * properties file. All references outside the ASCII range are replaced with \\uXXXX. 791 * 792 * @param s The string to convert 793 * @return the ASCII string 794 */ 795 public static String native2Ascii( final String s ) { 796 final StringBuilder sb = new StringBuilder(); 797 for( int i = 0; i < s.length(); i++ ) { 798 final char aChar = s.charAt(i); 799 if( ( aChar < 0x0020 ) || ( aChar > 0x007e ) ) { 800 sb.append( '\\'); 801 sb.append( 'u'); 802 sb.append( toHex( ( aChar >> 12 ) & 0xF ) ); 803 sb.append( toHex( ( aChar >> 8 ) & 0xF ) ); 804 sb.append( toHex( ( aChar >> 4 ) & 0xF ) ); 805 sb.append( toHex( aChar & 0xF ) ); 806 } else { 807 sb.append( aChar ); 808 } 809 } 810 return sb.toString(); 811 } 812 813 private static char toHex( final int nibble ) { 814 final char[] hexDigit = { 815 '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F' 816 }; 817 return hexDigit[ nibble & 0xF ]; 818 } 819 820 /** 821 * Generates a hexadecimal string from an array of bytes. For example, if the array contains 822 * { 0x01, 0x02, 0x3E }, the resulting string will be "01023E". 823 * 824 * @param bytes A Byte array 825 * @return A String representation 826 * @since 2.3.87 827 */ 828 public static String toHexString( final byte[] bytes ) { 829 final StringBuilder sb = new StringBuilder( bytes.length * 2 ); 830 for( final byte aByte : bytes ) { 831 sb.append( toHex( aByte >> 4 ) ); 832 sb.append( toHex( aByte ) ); 833 } 834 835 return sb.toString(); 836 } 837 838 /** 839 * Returns true, if the argument contains a number, otherwise false. In a quick test this is roughly the same 840 * speed as Integer.parseInt() if the argument is a number, and roughly ten times the speed, if the argument 841 * is NOT a number. 842 * 843 * @since 2.4 844 * @param s String to check 845 * @return True, if s represents a number. False otherwise. 846 */ 847 public static boolean isNumber( String s ) { 848 if( s == null ) { 849 return false; 850 } 851 852 if( s.length() > 1 && s.charAt(0) == '-' ) { 853 s = s.substring( 1 ); 854 } 855 856 for( int i = 0; i < s.length(); i++ ) { 857 if( !Character.isDigit( s.charAt( i ) ) ) { 858 return false; 859 } 860 } 861 862 return true; 863 } 864 865 /** 866 * Generate a random String suitable for use as a temporary password. 867 * 868 * @return String suitable for use as a temporary password 869 * @since 2.4 870 */ 871 public static String generateRandomPassword() { 872 final StringBuilder pw = new StringBuilder(); 873 for( int i = 0; i < PASSWORD_LENGTH; i++ ) { 874 final int index = ( int )( RANDOM.nextDouble() * PWD_BASE.length() ); 875 pw.append(PWD_BASE.charAt( index )); 876 } 877 return pw.toString(); 878 } 879 880}