001package org.unix4j.util; 002 003import java.util.ArrayList; 004import java.util.List; 005 006import org.unix4j.line.Line; 007import org.unix4j.line.SingleCharSequenceLine; 008 009/** 010 * Utility class with static methods for strings. 011 */ 012public class StringUtil { 013 /** 014 * Returns the given {@code value} as a string of fixed length {@code size} 015 * padding or truncating the value if necessary. 016 * <p> 017 * If {@code left==true}, the given {@code value} is left-aligned appending 018 * the given {@code filler} character to make up the fixed length. If the 019 * given {@code value} turns out to be longer than {@code size} when 020 * transformed into a string, it is truncated from the right. 021 * <p> 022 * If {@code left==false}, the given {@code value} is right-aligned and 023 * {@code filler} characters are added from the left if necessary. If 024 * {@code value} is longer than {@code size} it is truncated from the left. 025 * <p> 026 * Examples with {@code size=3}: 027 * <ul> 028 * <li>left=true, filler=' ', value=89 --> "89 "</li> 029 * <li>left=true, filler=' ', value=1234 --> "123"</li> 030 * <li>left=false, filler=' ', value=89 --> " 89"</li> 031 * <li>left=false, filler='0', value=89 --> "089"</li> 032 * <li>left=false, filler=' ', value=1234 --> "234"</li> 033 * </ul> 034 * 035 * @param size 036 * the fixed size of the returned string 037 * @param alignLeft 038 * true if {@code value} should be left-aligned 039 * @param filler 040 * the filler character if {@code value} is shorter than 041 * {@code size} 042 * @param value 043 * the value to format 044 * @return the value as a fixed size string, padded or truncated if 045 * necessary 046 */ 047 public static String fixSizeString(int size, boolean alignLeft, 048 char filler, long value) { 049 return fixSizeString(size, alignLeft, filler, String.valueOf(value)); 050 } 051 052 /** 053 * Returns the given string {@code s} into a string of fixed length 054 * {@code size} padding or truncating the string with spaces if necessary. 055 * <p> 056 * If {@code left==true}, the given string {@code s} is left-aligned 057 * appending spaces to make up the fixed length. If {@code s} turns out to 058 * be longer than {@code size} it is truncated from the right. 059 * <p> 060 * If {@code left==false}, the given string {@code s} is right-aligned and 061 * space characters are added from the left if necessary. If {@code s} is 062 * longer than {@code size} it is truncated from the left. 063 * <p> 064 * Examples with {@code size=3}: 065 * <ul> 066 * <li>left=true, s="XY" --> "XY "</li> 067 * <li>left=true, s="Abcd" --> "Abc"</li> 068 * <li>left=false, s="XY" --> " XY"</li> 069 * <li>left=false, s="Abcd" --> "bcd"</li> 070 * </ul> 071 * 072 * @param size 073 * the fixed size of the returned string 074 * @param alignLeft 075 * true if {@code value} should be left-aligned 076 * @param s 077 * the string to format 078 * @return the string {@code s} as a fixed size string, padded or truncated 079 * if necessary 080 */ 081 public static String fixSizeString(int size, boolean alignLeft, 082 String s) { 083 return fixSizeString(size, alignLeft, ' ', s); 084 } 085 086 /** 087 * Returns the given string {@code s} into a string of fixed length 088 * {@code size} padding or truncating the string if necessary. 089 * <p> 090 * If {@code left==true}, the given string {@code s} is left-aligned 091 * appending the given {@code filler} character to make up the fixed length. 092 * If {@code s} turns out to be longer than {@code size} it is truncated 093 * from the right. 094 * <p> 095 * If {@code left==false}, the given string {@code s} is right-aligned and 096 * {@code filler} characters are added from the left if necessary. If 097 * {@code s} is longer than {@code size} it is truncated from the left. 098 * <p> 099 * Examples with {@code size=3}: 100 * <ul> 101 * <li>left=true, filler=' ', s="XY" --> "XY "</li> 102 * <li>left=true, filler=' ', s="Abcd" --> "Abc"</li> 103 * <li>left=false, filler=' ', s="XY" --> " XY"</li> 104 * <li>left=false, filler='0', s="12" --> "012"</li> 105 * <li>left=false, filler=' ', s="Abcd" --> "bcd"</li> 106 * </ul> 107 * 108 * @param size 109 * the fixed size of the returned string 110 * @param alignLeft 111 * true if {@code value} should be left-aligned 112 * @param filler 113 * the filler character if {@code s} is shorter than {@code size} 114 * @param s 115 * the string to format 116 * @return the string {@code s} as a fixed size string, padded or truncated 117 * if necessary 118 */ 119 public static String fixSizeString(int size, boolean alignLeft, 120 char filler, String s) { 121 if (s.length() < size) { 122 final StringBuilder sb = new StringBuilder(size); 123 if (alignLeft) 124 sb.append(s); 125 for (int i = 0; i < size - s.length(); i++) { 126 sb.append(filler); 127 } 128 if (!alignLeft) 129 sb.append(s); 130 return sb.toString(); 131 } else { 132 return alignLeft ? s.substring(0, size) : s.substring(s.length() 133 - size, s.length()); 134 } 135 } 136 137 /** 138 * Splits the given string into lines and returns each line as a separate 139 * string in the result list. The result list will contain at least one 140 * entry unless the string is empty. 141 * <p> 142 * A trailing newline after the last line is ignored, meaning that no empty 143 * string is appended as separate line if the string ends with a newline. 144 * However multiple trailing newlines will still lead to empty line strings 145 * at the end of the list. 146 * <p> 147 * Note that all line ending characters are accepted to split lines, no 148 * matter what operating system this code is hosted on. More precisely, the 149 * {@link Line#LF LF} and {@link Line#CR CR} characters are recognized as 150 * line ending characters, either as single character or as a pair 151 * {@code CR+LF} or {@code LF+CR}. 152 * 153 * @param s 154 * the string to split 155 * @return a list with the lines found in {@code s} 156 */ 157 public static List<Line> splitLines(String s) { 158 final List<Line> lines = new ArrayList<Line>(); 159 int start = 0; 160 int index = 0; 161 while (index < s.length()) { 162 final char ch = s.charAt(index); 163 if (ch == Line.LF || ch == Line.CR) { 164 final int lineEndingStart = index; 165 index++; 166 if (index < s.length()) { 167 final char ch2 = s.charAt(index); 168 if (ch2 != ch && (ch2 == Line.LF || ch2 == Line.CR)) { 169 index++; 170 } 171 } 172 final Line line = new SingleCharSequenceLine(s, start, 173 lineEndingStart - start, index - lineEndingStart); 174 lines.add(line); 175 start = index; 176 } else { 177 index++; 178 } 179 } 180 if (start < s.length()) { 181 final Line line = new SingleCharSequenceLine(s, start, s.length() 182 - start, 0); 183 lines.add(line); 184 } 185 return lines; 186 } 187 188 /** 189 * Finds and returns the start of the given sequence after trimming 190 * whitespace characters from the left. 191 * 192 * @param s 193 * the character sequence 194 * @return the index containing the first non-whitespace character, or the 195 * length of the character sequence if all characters are blank 196 */ 197 public static int findStartTrimWhitespace(CharSequence s) { 198 return findStartTrimWhitespace(s, 0); 199 } 200 201 /** 202 * Finds and returns the start of the given sequence after trimming 203 * whitespace characters from the left, starting at the given {@code start} 204 * index. 205 * 206 * @param s 207 * the character sequence 208 * @param start 209 * the first index to consider in the char sequence 210 * @return the index containing the first non-whitespace character, or the 211 * length of the character sequence if all characters are blank 212 */ 213 public static int findStartTrimWhitespace(CharSequence s, int start) { 214 final int len = s.length(); 215 for (int i = start; i < len; i++) { 216 if (!Character.isWhitespace(s.charAt(i))) { 217 return i; 218 } 219 } 220 return len; 221 } 222 223 /** 224 * Finds and returns the end of the given character sequence after trimming 225 * white space characters from the right. Whitespace characters are defined 226 * by {@link Character#isWhitespace(char)}. . 227 * 228 * @param s 229 * the character sequence 230 * @return the index after the last non-whitespace character, or zero if all 231 * characters are blank 232 */ 233 public static int findEndTrimWhitespace(CharSequence s) { 234 for (int i = s.length(); i > 0; i--) { 235 if (!Character.isWhitespace(s.charAt(i - 1))) { 236 return i; 237 } 238 } 239 return 0; 240 } 241 242 /** 243 * Finds and returns the start of the given sequence after trimming newline 244 * characters from the left. The following character sequences are treated 245 * as newline characters: "\n", "\r\n". 246 * 247 * @param s 248 * the character sequence 249 * @return the index containing the first character that is not part of a 250 * newline sequence, or the length of the character sequence if all 251 * characters are newline chars 252 */ 253 public static int findStartTrimNewlineChars(CharSequence s) { 254 return findStartTrimNewlineChars(s, 0); 255 } 256 257 /** 258 * Finds and returns the start of the given sequence after trimming newline 259 * characters from the left, starting at the given {@code start} index. . 260 * The following character sequences are treated as newline characters: 261 * "\n", "\r\n". 262 * 263 * @param s 264 * the character sequence 265 * @param start 266 * the first index to consider in the char sequence 267 * @return the index containing the first character that is not part of a 268 * newline sequence, or the length of the character sequence if all 269 * characters are newline chars 270 */ 271 public static int findStartTrimNewlineChars(CharSequence s, int start) { 272 final int len = s.length(); 273 for (int i = start; i < len;) { 274 final int ch = s.charAt(i); 275 i++; 276 if (ch != '\n') { 277 if (ch != '\r' || i >= len || s.charAt(i) != '\n') { 278 return i - 1; 279 } 280 i++;// increment again, it was "\r\n" 281 } 282 } 283 return len; 284 } 285 286 /** 287 * Finds and returns the end of the given character sequence after trimming 288 * new line characters from the right. The following character sequences are 289 * treated as newline characters: "\n", "\r\n". 290 * 291 * @param s 292 * the character sequence 293 * @return the index after the last character that is not part of a newline 294 * sequence, or zero if all characters are newline chars 295 */ 296 public static int findEndTrimNewlineChars(CharSequence s) { 297 for (int i = s.length(); i > 0;) { 298 if (s.charAt(i - 1) != '\n') { 299 return i; 300 } 301 i--; 302 if (i > 0 && s.charAt(i - 1) == '\r') { 303 i--; 304 } 305 } 306 return 0; 307 } 308 309 /** 310 * Finds and returns the first whitespace character in the given sequence, 311 * or the length of the string if no whitespace is found. 312 * 313 * @param s 314 * the character sequence 315 * @return the index containing the first whitespace character, or the 316 * length of the character sequence if all characters are blank 317 */ 318 public static int findWhitespace(CharSequence s) { 319 return findWhitespace(s, 0); 320 } 321 322 /** 323 * Finds and returns the first whitespace character in the given sequence at 324 * or after start. Returns the length of the string if no whitespace is 325 * found. 326 * 327 * @param s 328 * the character sequence 329 * @param start 330 * the first index to consider in the char sequence 331 * @return the index containing the first whitespace character at or after 332 * start, or the length of the character sequence if all characters 333 * are blank 334 */ 335 public static int findWhitespace(CharSequence s, int start) { 336 final int len = s.length(); 337 for (int i = start; i < len; i++) { 338 if (Character.isWhitespace(s.charAt(i))) { 339 return i; 340 } 341 } 342 return len; 343 } 344 345 /** 346 * Returns true if and only if the string {@code s} contains the specified 347 * target string performing case insensitive string comparison. 348 * 349 * @param source 350 * the characters being searched. 351 * @param target 352 * the characters being searched for. 353 * @return true if this string contains <code>s</code>, false otherwise 354 * @throws NullPointerException 355 * if <code>s</code> is <code>null</code> 356 */ 357 public static boolean containsIgnoreCase(String source, String target) { 358 return 0 <= indexOfIgnoreCase(source, target); 359 } 360 361 /** 362 * Tests if this string {@code s} starts with the specified prefix 363 * performing case insensitive string comparison. 364 * 365 * @param s 366 * the string to search 367 * @param prefix 368 * the prefix. 369 * @return <code>true</code> if the character sequence represented by the 370 * argument is a prefix of the character sequence represented by the 371 * string s; <code>false</code> otherwise. Note also that 372 * <code>true</code> will be returned if the argument is an empty 373 * string or is equal to this <code>String</code> object as 374 * determined by the {@link #equals(Object)} method. 375 */ 376 public static boolean startsWithIgnoreCase(String s, String prefix) { 377 return 0 == indexOfIgnoreCase(s, prefix, 0); 378 } 379 380 /** 381 * Returns the index within the source string of the first occurrence of the 382 * specified target substring performing case insensitive string comparison. 383 * 384 * 385 * <p> 386 * The returned index is the smallest value <i>k</i> for which: <blockquote> 387 * 388 * <pre> 389 * startsWithIgnoreCase(source.substring(<i>k</i>), target) 390 * </pre> 391 * 392 * </blockquote> If no such value of <i>k</i> exists, then {@code -1} is 393 * returned. 394 * 395 * <p> 396 * Copied from {@code String.indexOf(..)} modified to do case-insensitive 397 * search. The source is the character array being searched, and the target 398 * is the string being searched for. 399 * 400 * @param source 401 * the characters being searched. 402 * @param target 403 * the characters being searched for. 404 * @return the index of the first occurrence of the specified substring 405 * (ignoring the case), or {@code -1} if there is no such 406 * occurrence. 407 */ 408 public static int indexOfIgnoreCase(String source, String target) { 409 return indexOfIgnoreCase(source, target, Integer.MAX_VALUE); 410 } 411 412 /** 413 * 414 * @param source 415 * the characters being searched. 416 * @param target 417 * the characters being searched for. 418 * @param maxIndex 419 * the maximum index to return (for instance 0 if only the start 420 * of the string is of interest) 421 */ 422 private static int indexOfIgnoreCase(String source, String target, 423 int maxIndex) { 424 if (maxIndex < 0) { 425 throw new IllegalArgumentException("maxIndex cannot be negative: " 426 + maxIndex); 427 } 428 final int sourceCount = source.length(); 429 final int targetCount = target.length(); 430 final char first = target.charAt(0); 431 final int max = Math.min(maxIndex, sourceCount - targetCount); 432 433 for (int i = 0; i <= max; i++) { 434 /* Look for first character. */ 435 if (!equalsIgnoreCase(source.charAt(i), first)) { 436 while (++i <= max && !equalsIgnoreCase(source.charAt(i), first)) 437 ; 438 } 439 440 /* Found first character, now look at the rest of v2 */ 441 if (i <= max) { 442 int j = i + 1; 443 int end = j + targetCount - 1; 444 for (int k = 1; j < end && equalsIgnoreCase(source.charAt(j), target.charAt(k)); j++, k++) 445 ; 446 447 if (j == end) { 448 /* Found whole string. */ 449 return i; 450 } 451 } 452 } 453 return -1; 454 } 455 456 /** 457 * Returns true if the two characters are equal if case is ignored. 458 * 459 * @param ch1 460 * the first character 461 * @param ch2 462 * the second character 463 * @return true if both characters are the same according to 464 * case-insensitive comparison 465 */ 466 public static boolean equalsIgnoreCase(char ch1, char ch2) { 467 if (ch1 == ch2) 468 return true; 469 // If characters try converting both characters to uppercase 470 ch1 = Character.toUpperCase(ch1); 471 ch2 = Character.toUpperCase(ch2); 472 if (ch1 == ch2) 473 return true; 474 // Unfortunately, conversion to uppercase does not work properly 475 // for the Georgian alphabet, which has strange rules about case 476 // conversion. So we need to make one last check before 477 // exiting. 478 return Character.toLowerCase(ch1) == Character.toLowerCase(ch2); 479 } 480 481 // no instances 482 private StringUtil() { 483 super(); 484 } 485 486}