001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.util; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.logging.log4j.LogManager; 023import org.apache.logging.log4j.Logger; 024 025import java.io.File; 026import java.io.IOException; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.security.SecureRandom; 030import java.util.NoSuchElementException; 031import java.util.Properties; 032import java.util.Random; 033import java.util.stream.Collectors; 034import java.util.stream.IntStream; 035 036 037/** 038 * Contains a number of static utility methods. 039 */ 040public final class TextUtil { 041 042 private static final Logger LOG = LogManager.getLogger( TextUtil.class ); 043 044 static final String HEX_DIGITS = "0123456789ABCDEF"; 045 046 /** Pick from some letters that won't be easily mistaken for each other to compose passwords. So, for example, omit o, O and 0, or 1, l and L.*/ 047 static final String PWD_BASE = "abcdefghjkmnpqrstuvwxyzABCDEFGHJKMNPQRSTUVWXYZ23456789+@"; 048 049 /** Length of password. {@link #generateRandomPassword() */ 050 public static final int PASSWORD_LENGTH = 8; 051 052 /** Lists all punctuation characters allowed in WikiMarkup. These will not be cleaned away. This is for compatibility for older versions 053 of JSPWiki. */ 054 public static final String LEGACY_CHARS_ALLOWED = "._"; 055 056 /** Lists all punctuation characters allowed in page names. */ 057 public static final String PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$"; 058 059 /** Private constructor prevents instantiation. */ 060 private TextUtil() {} 061 062 /** 063 * java.net.URLEncoder.encode() method in JDK < 1.4 is buggy. This duplicates its functionality. 064 * 065 * @param rs the string to encode 066 * @return the URL-encoded string 067 */ 068 static String urlEncode( final byte[] rs ) { 069 final StringBuilder result = new StringBuilder( rs.length * 2 ); 070 071 // Does the URLEncoding. We could use the java.net one, but it does not eat byte[]s. 072 for( final byte r : rs ) { 073 final char c = ( char )r; 074 switch( c ) { 075 case '_': 076 case '.': 077 case '*': 078 case '-': 079 case '/': 080 result.append( c ); 081 break; 082 case ' ': 083 result.append( '+' ); 084 break; 085 default: 086 if( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) ) { 087 result.append( c ); 088 } else { 089 result.append( '%' ); 090 result.append( HEX_DIGITS.charAt( ( c & 0xF0 ) >> 4 ) ); 091 result.append( HEX_DIGITS.charAt( c & 0x0F ) ); 092 } 093 } 094 } 095 096 return result.toString(); 097 } 098 099 /** 100 * URL encoder does not handle all characters correctly. See <A HREF="http://developer.java.sun.com/developer/bugParade/bugs/4257115.html"> 101 * Bug parade, bug #4257115</A> for more information. 102 * <P> 103 * Thanks to CJB for this fix. 104 * 105 * @param bytes The byte array containing the bytes of the string 106 * @param encoding The encoding in which the string should be interpreted 107 * @return A decoded String 108 * 109 * @throws IllegalArgumentException If the byte array is not a valid string. 110 */ 111 static String urlDecode( final byte[] bytes, final String encoding ) throws IllegalArgumentException { 112 if( bytes == null ) { 113 return null; 114 } 115 116 final byte[] decodeBytes = new byte[ bytes.length ]; 117 int decodedByteCount = 0; 118 119 try { 120 for( int count = 0; count < bytes.length; count++ ) { 121 switch( bytes[count] ) { 122 case '+': 123 decodeBytes[decodedByteCount++] = ( byte ) ' '; 124 break ; 125 126 case '%': 127 decodeBytes[decodedByteCount++] = ( byte )( ( HEX_DIGITS.indexOf( bytes[++count] ) << 4 ) + 128 ( HEX_DIGITS.indexOf( bytes[++count] ) ) ); 129 break ; 130 131 default: 132 decodeBytes[decodedByteCount++] = bytes[count] ; 133 } 134 } 135 136 } catch( final IndexOutOfBoundsException ae ) { 137 throw new IllegalArgumentException( "Malformed UTF-8 string?" ); 138 } 139 140 return new String(decodeBytes, 0, decodedByteCount, Charset.forName( encoding ) ); 141 } 142 143 /** 144 * As java.net.URLEncoder class, but this does it in UTF8 character set. 145 * 146 * @param text The text to decode 147 * @return An URLEncoded string. 148 */ 149 public static String urlEncodeUTF8( final String text ) { 150 // If text is null, just return an empty string 151 if ( text == null ) { 152 return ""; 153 } 154 155 return urlEncode( text.getBytes( StandardCharsets.UTF_8 ) ); 156 } 157 158 /** 159 * As java.net.URLDecoder class, but for UTF-8 strings. null is a safe value and returns null. 160 * 161 * @param utf8 The UTF-8 encoded string 162 * @return A plain, normal string. 163 */ 164 public static String urlDecodeUTF8( final String utf8 ) { 165 if( utf8 == null ) { 166 return null; 167 } 168 169 return urlDecode( utf8.getBytes( StandardCharsets.ISO_8859_1 ), StandardCharsets.UTF_8.toString() ); 170 } 171 172 /** 173 * Provides encoded version of string depending on encoding. Encoding may be UTF-8 or ISO-8859-1 (default). 174 * 175 * <p>This implementation is the same as in FileSystemProvider.mangleName(). 176 * 177 * @param data A string to encode 178 * @param encoding The encoding in which to encode 179 * @return A URL encoded string. 180 */ 181 public static String urlEncode( final String data, final String encoding ) { 182 // Presumably, the same caveats apply as in FileSystemProvider. Don't see why it would be horribly kludgy, though. 183 if( StandardCharsets.UTF_8.toString().equals( encoding ) ) { 184 return urlEncodeUTF8( data ); 185 } 186 187 return urlEncode( data.getBytes( Charset.forName( encoding ) ) ); 188 } 189 190 /** 191 * Provides decoded version of string depending on encoding. Encoding may be UTF-8 or ISO-8859-1 (default). 192 * 193 * <p>This implementation is the same as in FileSystemProvider.unmangleName(). 194 * 195 * @param data The URL-encoded string to decode 196 * @param encoding The encoding to use 197 * @return A decoded string. 198 * @throws IllegalArgumentException If the data cannot be decoded. 199 */ 200 public static String urlDecode( final String data, final String encoding ) throws IllegalArgumentException { 201 // Presumably, the same caveats apply as in FileSystemProvider. Don't see why it would be horribly kludgy, though. 202 if( StandardCharsets.UTF_8.name().equals( encoding ) ) { 203 return urlDecodeUTF8( data ); 204 } 205 206 return urlDecode( data.getBytes( Charset.forName( encoding ) ), encoding ); 207 } 208 209 /** 210 * Replaces the relevant entities inside the String. All & >, <, and " are replaced by their respective names. 211 * 212 * @since 1.6.1 213 * @param src The source string. 214 * @return The encoded string. 215 */ 216 public static String replaceEntities( String src ) { 217 src = replaceString( src, "&", "&" ); 218 src = replaceString( src, "<", "<" ); 219 src = replaceString( src, ">", ">" ); 220 src = replaceString( src, "\"", """ ); 221 222 return src; 223 } 224 225 /** 226 * Replaces a string with another string. 227 * 228 * @param orig Original string. Null is safe. 229 * @param src The string to find. 230 * @param dest The string to replace <I>src</I> with. 231 * @return A string with the replacement done. 232 */ 233 public static String replaceString( final String orig, final String src, final String dest ) { 234 if ( orig == null ) { 235 return null; 236 } 237 if ( src == null || dest == null ) { 238 throw new NullPointerException(); 239 } 240 if ( src.isEmpty() ) { 241 return orig; 242 } 243 244 final StringBuilder res = new StringBuilder( orig.length() + 20 ); // Pure guesswork 245 int start; 246 int end = 0; 247 int last = 0; 248 249 while ( ( start = orig.indexOf( src,end ) ) != -1 ) { 250 res.append( orig, last, start ); 251 res.append( dest ); 252 end = start + src.length(); 253 last = start + src.length(); 254 } 255 res.append( orig.substring( end ) ); 256 257 return res.toString(); 258 } 259 260 /** 261 * Replaces a part of a string with a new String. 262 * 263 * @param start Where in the original string the replacing should start. 264 * @param end Where the replacing should end. 265 * @param orig Original string. Null is safe. 266 * @param text The new text to insert into the string. 267 * @return The string with the orig replaced with text. 268 */ 269 public static String replaceString( final String orig, final int start, final int end, final String text ) { 270 if( orig == null ) { 271 return null; 272 } 273 274 final StringBuilder buf = new StringBuilder( orig ); 275 buf.replace( start, end, text ); 276 return buf.toString(); 277 } 278 279 /** 280 * Replaces a string with another string. Case-insensitive matching is used 281 * 282 * @param orig Original string. Null is safe. 283 * @param src The string to find. 284 * @param dest The string to replace <em>src</em> with. 285 * @return A string with all instances of src replaced with dest. 286 */ 287 public static String replaceStringCaseUnsensitive( final String orig, final String src, final String dest ) { 288 if( orig == null ) { 289 return null; 290 } 291 292 final StringBuilder res = new StringBuilder(); 293 int start; 294 int end = 0; 295 int last = 0; 296 297 final String origCaseUnsn = orig.toLowerCase(); 298 final String srcCaseUnsn = src.toLowerCase(); 299 while( ( start = origCaseUnsn.indexOf( srcCaseUnsn, end ) ) != -1 ) { 300 res.append( orig, last, start ); 301 res.append( dest ); 302 end = start + src.length(); 303 last = start + src.length(); 304 } 305 res.append( orig.substring( end ) ); 306 307 return res.toString(); 308 } 309 310 /** 311 * Parses an integer parameter, returning a default value if the value is null or a non-number. 312 * 313 * @param value The value to parse 314 * @param defvalue A default value in case the value is not a number 315 * @return The parsed value (or defvalue). 316 */ 317 public static int parseIntParameter( final String value, final int defvalue ) { 318 try { 319 return Integer.parseInt( value.trim() ); 320 } catch( final Exception e ) {} 321 322 return defvalue; 323 } 324 325 /** 326 * Gets an integer-valued property from a standard Properties list. 327 * 328 * Before inspecting the props, we first check if there is a Java System Property with the same name, if it exists we use that value, 329 * if not we check an environment variable with that (almost) same name, almost meaning we replace dots with underscores. 330 * 331 * If the value does not exist, or is a non-integer, returns defVal. 332 * 333 * @since 2.1.48. 334 * @param props The property set to look through 335 * @param key The key to look for 336 * @param defVal If the property is not found or is a non-integer, returns this value. 337 * @return The property value as an integer (or defVal). 338 */ 339 public static int getIntegerProperty( final Properties props, final String key, final int defVal ) { 340 String val = System.getProperties().getProperty( key, System.getenv( StringUtils.replace( key,".","_" ) ) ); 341 if( val == null ) { 342 val = props.getProperty( key ); 343 } 344 return parseIntParameter( val, defVal ); 345 } 346 347 /** 348 * Gets a boolean property from a standard Properties list. Returns the default value, in case the key has not been set. 349 * Before inspecting the props, we first check if there is a Java System Property with the same name, if it exists 350 * we use that value, if not we check an environment variable with that (almost) same name, almost meaning we replace 351 * dots with underscores. 352 * <P> 353 * The possible values for the property are "true"/"false", "yes"/"no", or "on"/"off". Any value not recognized is always defined 354 * as "false". 355 * 356 * @param props A list of properties to search. 357 * @param key The property key. 358 * @param defval The default value to return. 359 * 360 * @return True, if the property "key" was set to "true", "on", or "yes". 361 * 362 * @since 2.0.11 363 */ 364 public static boolean getBooleanProperty( final Properties props, final String key, final boolean defval ) { 365 String val = System.getProperties().getProperty( key, System.getenv( StringUtils.replace( key,".","_" ) ) ); 366 if( val == null ) { 367 val = props.getProperty( key ); 368 } 369 if( val == null ) { 370 return defval; 371 } 372 373 return isPositive( val ); 374 } 375 376 /** 377 * Fetches a String property from the set of Properties. This differs from Properties.getProperty() in a 378 * couple of key respects: First, property value is trim()med (so no extra whitespace back and front). 379 * 380 * Before inspecting the props, we first check if there is a Java System Property with the same name, if it exists 381 * we use that value, if not we check an environment variable with that (almost) same name, almost meaning we replace 382 * dots with underscores. 383 * 384 * @param props The Properties to search through 385 * @param key The property key 386 * @param defval A default value to return, if the property does not exist. 387 * @return The property value. 388 * @since 2.1.151 389 */ 390 public static String getStringProperty( final Properties props, final String key, final String defval ) { 391 String val = System.getProperties().getProperty( key, System.getenv( StringUtils.replace( key,".","_" ) ) ); 392 if( val == null ) { 393 val = props.getProperty( key ); 394 } 395 if( val == null ) { 396 return defval; 397 } 398 return val.trim(); 399 } 400 401 /** 402 * {@link #getStringProperty(Properties, String, String)} overload that handles deprecated keys, so that a key and its 403 * deprecated counterpart can coexist in a given version of JSPWiki. 404 * 405 * @param props The Properties to search through 406 * @param key The property key 407 * @param deprecatedKey the property key being superseeded by key 408 * @param defval A default value to return, if the property does not exist. 409 * @return The property value. 410 */ 411 public static String getStringProperty( final Properties props, final String key, final String deprecatedKey, final String defval ) { 412 final String val = getStringProperty( props, deprecatedKey, null ); 413 if( val != null ) { 414 LOG.warn( "{} is being deprecated and will be removed on a future version, please consider using {} instead " + 415 "in your jspwiki[-custom].properties file", deprecatedKey, key ); 416 return val; 417 } 418 return getStringProperty( props, key, defval ); 419 } 420 421 /** 422 * Throws an exception if a property is not found. 423 * 424 * @param props A set of properties to search the key in. 425 * @param key The key to look for. 426 * @return The required property 427 * 428 * @throws NoSuchElementException If the search key is not in the property set. 429 * @since 2.0.26 (on TextUtils, moved To WikiEngine on 2.11.0-M1 and back to TextUtils on 2.11.0-M6) 430 */ 431 public static String getRequiredProperty( final Properties props, final String key ) throws NoSuchElementException { 432 final String value = getStringProperty( props, key, null ); 433 if( value == null ) { 434 throw new NoSuchElementException( "Required property not found: " + key ); 435 } 436 return value; 437 } 438 439 /** 440 * {@link #getRequiredProperty(Properties, String)} overload that handles deprecated keys, so that a key and its 441 * deprecated counterpart can coexist in a given version of JSPWiki. 442 * 443 * @param props The Properties to search through 444 * @param key The property key 445 * @param deprecatedKey the property key being superseeded by key 446 * @return The property value. 447 */ 448 public static String getRequiredProperty( final Properties props, final String key, final String deprecatedKey ) throws NoSuchElementException { 449 final String value = getStringProperty( props, deprecatedKey, null ); 450 if( value == null ) { 451 return getRequiredProperty( props, key ); 452 } 453 LOG.warn( "{} is being deprecated and will be removed on a future version, please consider using {} instead " + 454 "in your jspwiki[-custom].properties file", deprecatedKey, key ); 455 return value; 456 } 457 458 /** 459 * Fetches a file path property from the set of Properties. 460 * 461 * Before inspecting the props, we first check if there is a Java System Property with the same name, if it exists we use that value, 462 * if not we check an environment variable with that (almost) same name, almost meaning we replace dots with underscores. 463 * 464 * If the implementation fails to create the canonical path it just returns the original value of the property which is a bit doggy. 465 * 466 * @param props The Properties to search through 467 * @param key The property key 468 * @param defval A default value to return, if the property does not exist. 469 * @return the canonical path of the file or directory being referenced 470 * @since 2.10.1 471 */ 472 public static String getCanonicalFilePathProperty( final Properties props, final String key, final String defval ) { 473 String val = System.getProperties().getProperty( key, System.getenv( StringUtils.replace( key,".","_" ) ) ); 474 if( val == null ) { 475 val = props.getProperty( key ); 476 } 477 478 if( val == null ) { 479 val = defval; 480 } 481 482 String result; 483 try { 484 result = new File( new File( val.trim() ).getCanonicalPath() ).getAbsolutePath(); 485 } catch( final IOException e ) { 486 result = val.trim(); 487 } 488 return result; 489 } 490 491 /** 492 * Returns true, if the string "val" denotes a positive string. Allowed values are "yes", "on", and "true". 493 * Comparison is case-insignificant. Null values are safe. 494 * 495 * @param val Value to check. 496 * @return True, if val is "true", "on", or "yes"; otherwise false. 497 * 498 * @since 2.0.26 499 */ 500 public static boolean isPositive( String val ) { 501 if( val == null ) { 502 return false; 503 } 504 val = val.trim(); 505 return val.equalsIgnoreCase( "true" ) 506 || val.equalsIgnoreCase( "on" ) 507 || val.equalsIgnoreCase( "yes" ); 508 } 509 510 /** 511 * Makes sure that the POSTed data is conforms to certain rules. These rules are: 512 * <UL> 513 * <LI>The data always ends with a newline (some browsers, such as NS4.x series, does not send a newline at 514 * the end, which makes the diffs a bit strange sometimes. 515 * <LI>The CR/LF/CRLF mess is normalized to plain CRLF. 516 * </UL> 517 * 518 * The reason why we're using CRLF is that most browser already return CRLF since that is the closest thing to an HTTP standard. 519 * 520 * @param postData The data to normalize 521 * @return Normalized data 522 */ 523 public static String normalizePostData( final String postData ) { 524 final StringBuilder sb = new StringBuilder(); 525 for( int i = 0; i < postData.length(); i++ ) { 526 switch( postData.charAt(i) ) { 527 case 0x0a: // LF, UNIX 528 sb.append( "\r\n" ); 529 break; 530 531 case 0x0d: // CR, either Mac or MSDOS 532 sb.append( "\r\n" ); 533 // If it's MSDOS, skip the LF so that we don't add it again. 534 if( i < postData.length() - 1 && postData.charAt( i + 1 ) == 0x0a ) { 535 i++; 536 } 537 break; 538 539 default: 540 sb.append( postData.charAt( i ) ); 541 break; 542 } 543 } 544 545 if( sb.length() < 2 || !sb.substring( sb.length()-2 ).equals( "\r\n" ) ) { 546 sb.append( "\r\n" ); 547 } 548 549 return sb.toString(); 550 } 551 552 private static final int EOI = 0; 553 private static final int LOWER = 1; 554 private static final int UPPER = 2; 555 private static final int DIGIT = 3; 556 private static final int OTHER = 4; 557 private static final Random RANDOM = new SecureRandom(); 558 559 private static int getCharKind( final int c ) { 560 if( c == -1 ) { 561 return EOI; 562 } 563 564 final char ch = ( char )c; 565 566 if( Character.isLowerCase( ch ) ) { 567 return LOWER; 568 } else if( Character.isUpperCase( ch ) ) { 569 return UPPER; 570 } else if( Character.isDigit( ch ) ) { 571 return DIGIT; 572 } else { 573 return OTHER; 574 } 575 } 576 577 /** 578 * Adds spaces in suitable locations of the input string. This is used to transform a WikiName into a more readable format. 579 * 580 * @param s String to be beautified. 581 * @return A beautified string. 582 */ 583 public static String beautifyString( final String s ) { 584 return beautifyString( s, " " ); 585 } 586 587 /** 588 * Adds spaces in suitable locations of the input string. This is used to transform a WikiName into a more readable format. 589 * 590 * @param s String to be beautified. 591 * @param space Use this string for the space character. 592 * @return A beautified string. 593 * @since 2.1.127 594 */ 595 public static String beautifyString( final String s, final String space ) { 596 if( s == null || s.isEmpty() ) { 597 return ""; 598 } 599 600 final StringBuilder result = new StringBuilder(); 601 602 int cur = s.charAt( 0 ); 603 int curKind = getCharKind( cur ); 604 605 int prevKind = LOWER; 606 int nextKind; 607 int next; 608 int nextPos = 1; 609 610 while( curKind != EOI ) { 611 next = ( nextPos < s.length() ) ? s.charAt( nextPos++ ) : -1; 612 nextKind = getCharKind( next ); 613 614 if( ( prevKind == UPPER ) && ( curKind == UPPER ) && ( nextKind == LOWER ) ) { 615 result.append( space ); 616 result.append( ( char ) cur ); 617 } else { 618 result.append((char) cur ); 619 if( ( ( curKind == UPPER ) && (nextKind == DIGIT) ) 620 || ( ( curKind == LOWER ) && ( ( nextKind == DIGIT ) || ( nextKind == UPPER ) ) ) 621 || ( ( curKind == DIGIT ) && ( ( nextKind == UPPER ) || ( nextKind == LOWER ) ) ) ) { 622 result.append( space ); 623 } 624 } 625 prevKind = curKind; 626 cur = next; 627 curKind = nextKind; 628 } 629 630 return result.toString(); 631 } 632 633 /** 634 * Cleans a Wiki name based on a list of characters. Also, any multiple whitespace is collapsed into a single space, and any 635 * leading or trailing space is removed. 636 * 637 * @param text text to be cleared. Null is safe, and causes this to return null. 638 * @param allowedChars Characters which are allowed in the string. 639 * @return A cleaned text. 640 * 641 * @since 2.6 642 */ 643 public static String cleanString( String text, final String allowedChars ) { 644 if( text == null ) { 645 return null; 646 } 647 648 text = text.trim(); 649 final StringBuilder clean = new StringBuilder( text.length() ); 650 651 // Remove non-alphanumeric characters that should not be put inside WikiNames. Note that all valid Unicode letters are 652 // considered okay for WikiNames. It is the problem of the WikiPageProvider to take care of actually storing that information. 653 // 654 // Also capitalize things, if necessary. 655 656 boolean isWord = true; // If true, we've just crossed a word boundary 657 boolean wasSpace = false; 658 for( int i = 0; i < text.length(); i++ ) { 659 char ch = text.charAt( i ); 660 661 // Cleans away repetitive whitespace and only uses the first one. 662 if( Character.isWhitespace( ch ) ) { 663 if( wasSpace ) { 664 continue; 665 } 666 667 wasSpace = true; 668 } else { 669 wasSpace = false; 670 } 671 672 // Check if it is allowed to use this char, and capitalize, if necessary. 673 if( Character.isLetterOrDigit( ch ) || allowedChars.indexOf( ch ) != -1 ) { 674 // Is a letter 675 if( isWord ) { 676 ch = Character.toUpperCase( ch ); 677 } 678 clean.append( ch ); 679 isWord = false; 680 } else { 681 isWord = true; 682 } 683 } 684 685 return clean.toString(); 686 } 687 688 /** 689 * Creates a Properties object based on an array which contains alternatively a key and a value. It is useful 690 * for generating default mappings. For example: 691 * <pre> 692 * String[] properties = { "jspwiki.property1", "value1", "jspwiki.property2", "value2 }; 693 * Properties props = TextUtil.createPropertes( values ); 694 * System.out.println( props.getProperty("jspwiki.property1") ); 695 * </pre> 696 * would output "value1". 697 * 698 * @param values Alternating key and value pairs. 699 * @return Property object 700 * @see java.util.Properties 701 * @throws IllegalArgumentException if the property array is missing a value for a key. 702 * @since 2.2. 703 */ 704 public static Properties createProperties( final String[] values ) throws IllegalArgumentException { 705 if( values.length % 2 != 0 ) { 706 throw new IllegalArgumentException( "One value is missing."); 707 } 708 709 final Properties props = new Properties(); 710 for( int i = 0; i < values.length; i += 2 ) { 711 props.setProperty( values[i], values[i + 1] ); 712 } 713 714 return props; 715 } 716 717 /** 718 * Counts the number of sections (separated with "----") from the page. 719 * 720 * @param pagedata The WikiText to parse. 721 * @return int Number of counted sections. 722 * @since 2.1.86. 723 */ 724 public static int countSections( final String pagedata ) { 725 int tags = 0; 726 int start = 0; 727 728 while( ( start = pagedata.indexOf( "----", start ) ) != -1 ) { 729 tags++; 730 start += 4; // Skip this "----" 731 } 732 733 // The first section does not get the "----" 734 return !pagedata.isEmpty() ? tags + 1 : 0; 735 } 736 737 /** 738 * Gets the given section (separated with "----") from the page text. Note that the first section is always #1. If a page has no 739 * section markers, then there is only a single section, #1. 740 * 741 * @param pagedata WikiText to parse. 742 * @param section Which section to get. 743 * @return String The section. 744 * @throws IllegalArgumentException If the page does not contain this many sections. 745 * @since 2.1.86. 746 */ 747 public static String getSection( final String pagedata, final int section ) throws IllegalArgumentException { 748 int tags = 0; 749 int start = 0; 750 int previous = 0; 751 752 while( ( start = pagedata.indexOf( "----", start ) ) != -1 ) { 753 if( ++tags == section ) { 754 return pagedata.substring( previous, start ); 755 } 756 757 start += 4; // Skip this "----" 758 // allow additional dashes, treat it as if it was a correct 4-dash 759 while (start < pagedata.length() && pagedata.charAt( start ) == '-') { 760 start++; 761 } 762 763 previous = start; 764 } 765 766 if( ++tags == section ) { 767 return pagedata.substring( previous ); 768 } 769 770 throw new IllegalArgumentException( "There is no section no. " + section + " on the page." ); 771 } 772 773 /** 774 * A simple routine which just repeates the arguments. This is useful for creating something like a line or something. 775 * 776 * @param what String to repeat 777 * @param times How many times to repeat the string. 778 * @return Guess what? 779 * @since 2.1.98. 780 */ 781 public static String repeatString( final String what, final int times ) { 782 783 return IntStream.range(0, times).mapToObj(i -> what).collect(Collectors.joining()); 784 } 785 786 /** 787 * Converts a string from the Unicode representation into something that can be embedded in a java 788 * properties file. All references outside the ASCII range are replaced with \\uXXXX. 789 * 790 * @param s The string to convert 791 * @return the ASCII string 792 */ 793 public static String native2Ascii( final String s ) { 794 final StringBuilder sb = new StringBuilder(); 795 for( int i = 0; i < s.length(); i++ ) { 796 final char aChar = s.charAt(i); 797 if( ( aChar < 0x0020 ) || ( aChar > 0x007e ) ) { 798 sb.append( '\\'); 799 sb.append( 'u'); 800 sb.append( toHex( ( aChar >> 12 ) & 0xF ) ); 801 sb.append( toHex( ( aChar >> 8 ) & 0xF ) ); 802 sb.append( toHex( ( aChar >> 4 ) & 0xF ) ); 803 sb.append( toHex( aChar & 0xF ) ); 804 } else { 805 sb.append( aChar ); 806 } 807 } 808 return sb.toString(); 809 } 810 811 private static char toHex( final int nibble ) { 812 final char[] hexDigit = { 813 '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F' 814 }; 815 return hexDigit[ nibble & 0xF ]; 816 } 817 818 /** 819 * Generates a hexadecimal string from an array of bytes. For example, if the array contains 820 * { 0x01, 0x02, 0x3E }, the resulting string will be "01023E". 821 * 822 * @param bytes A Byte array 823 * @return A String representation 824 * @since 2.3.87 825 */ 826 public static String toHexString( final byte[] bytes ) { 827 final StringBuilder sb = new StringBuilder( bytes.length * 2 ); 828 for( final byte aByte : bytes ) { 829 sb.append( toHex( aByte >> 4 ) ); 830 sb.append( toHex( aByte ) ); 831 } 832 833 return sb.toString(); 834 } 835 836 /** 837 * Returns true, if the argument contains a number, otherwise false. In a quick test this is roughly the same 838 * speed as Integer.parseInt() if the argument is a number, and roughly ten times the speed, if the argument 839 * is NOT a number. 840 * 841 * @since 2.4 842 * @param s String to check 843 * @return True, if s represents a number. False otherwise. 844 */ 845 public static boolean isNumber( String s ) { 846 if( s == null ) { 847 return false; 848 } 849 850 if( s.length() > 1 && s.charAt(0) == '-' ) { 851 s = s.substring( 1 ); 852 } 853 854 for( int i = 0; i < s.length(); i++ ) { 855 if( !Character.isDigit( s.charAt( i ) ) ) { 856 return false; 857 } 858 } 859 860 return true; 861 } 862 863 /** 864 * Generate a random String suitable for use as a temporary password. 865 * 866 * @return String suitable for use as a temporary password 867 * @since 2.4 868 */ 869 public static String generateRandomPassword() { 870 return IntStream.range(0, PASSWORD_LENGTH).map(i -> (int) (RANDOM.nextDouble() * PWD_BASE.length())).mapToObj(index -> String.valueOf(PWD_BASE.charAt(index))).collect(Collectors.joining()); 871 } 872 873}