001 //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/tags/2.1/src/org/deegree/framework/util/StringTools.java $ 002 /*---------------- FILE HEADER ------------------------------------------ 003 004 This file is part of deegree. 005 Copyright (C) 2001-2007 by: 006 EXSE, Department of Geography, University of Bonn 007 http://www.giub.uni-bonn.de/deegree/ 008 lat/lon GmbH 009 http://www.lat-lon.de 010 011 This library is free software; you can redistribute it and/or 012 modify it under the terms of the GNU Lesser General Public 013 License as published by the Free Software Foundation; either 014 version 2.1 of the License, or (at your option) any later version. 015 016 This library is distributed in the hope that it will be useful, 017 but WITHOUT ANY WARRANTY; without even the implied warranty of 018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 Lesser General Public License for more details. 020 021 You should have received a copy of the GNU Lesser General Public 022 License along with this library; if not, write to the Free Software 023 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 024 025 Contact: 026 027 Andreas Poth 028 lat/lon GmbH 029 Aennchenstr. 19 030 53115 Bonn 031 Germany 032 E-Mail: poth@lat-lon.de 033 034 Prof. Dr. Klaus Greve 035 Department of Geography 036 University of Bonn 037 Meckenheimer Allee 166 038 53115 Bonn 039 Germany 040 E-Mail: greve@giub.uni-bonn.de 041 042 043 ---------------------------------------------------------------------------*/ 044 package org.deegree.framework.util; 045 046 import java.io.BufferedReader; 047 import java.io.IOException; 048 import java.io.InputStream; 049 import java.io.InputStreamReader; 050 import java.io.StringReader; 051 import java.util.ArrayList; 052 import java.util.HashMap; 053 import java.util.List; 054 import java.util.Locale; 055 import java.util.Map; 056 import java.util.Set; 057 import java.util.StringTokenizer; 058 059 import org.deegree.framework.xml.XMLFragment; 060 import org.deegree.framework.xml.XMLParsingException; 061 import org.deegree.framework.xml.XMLTools; 062 import org.deegree.ogcbase.CommonNamespaces; 063 import org.w3c.dom.Node; 064 import org.xml.sax.SAXException; 065 066 /** 067 * this is a collection of some methods that extends the functionallity of the sun-java string 068 * class. 069 * 070 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 071 * @author last edited by: $Author: apoth $ 072 * 073 * @version $Revision: 7730 $, $Date: 2007-07-05 21:47:32 +0200 (Do, 05 Jul 2007) $ 074 */ 075 public class StringTools { 076 077 /** 078 * This map is used for methods normalizeString() and initMap(). 079 * 080 * key = locale language, e.g. "de" 081 * value = map of substitution rules for this locale 082 */ 083 private static Map<String, Map<String, String>> localeMap; 084 085 /** 086 * concatenates an array of strings using a 087 * 088 * @see StringBuffer 089 * 090 * @param size 091 * estimated size of the target string 092 * @param objects 093 * toString() will be called for each object to append it to the result string 094 * @return 095 */ 096 public static String concat( int size, Object... objects ) { 097 StringBuilder sbb = new StringBuilder( size ); 098 for ( int i = 0; i < objects.length; i++ ) { 099 sbb.append( objects[i] ); 100 } 101 return sbb.toString(); 102 } 103 104 /** 105 * replaces occurences of a string fragment within a string by a new string. 106 * 107 * @param target 108 * is the original string 109 * @param from 110 * is the string to be replaced 111 * @param to 112 * is the string which will used to replace 113 * @param all 114 * if it's true all occurences of the string to be replaced will be replaced. else 115 * only the first occurence will be replaced. 116 */ 117 public static String replace( String target, String from, String to, boolean all ) { 118 119 StringBuffer buffer = new StringBuffer( target.length() ); 120 int copyFrom = 0; 121 char[] targetChars = null; 122 int lf = from.length(); 123 int start = -1; 124 do { 125 start = target.indexOf( from ); 126 copyFrom = 0; 127 if ( start == -1 ) { 128 return target; 129 } 130 131 targetChars = target.toCharArray(); 132 while ( start != -1 ) { 133 buffer.append( targetChars, copyFrom, start - copyFrom ); 134 buffer.append( to ); 135 copyFrom = start + lf; 136 start = target.indexOf( from, copyFrom ); 137 if ( !all ) { 138 start = -1; 139 } 140 } 141 buffer.append( targetChars, copyFrom, targetChars.length - copyFrom ); 142 target = buffer.toString(); 143 buffer.delete( 0, buffer.length() ); 144 } while ( target.indexOf( from ) > -1 && to.indexOf( from ) < 0 ); 145 146 return target; 147 } 148 149 /** 150 * parse a string and return its tokens as array 151 * 152 * @param s 153 * string to parse 154 * @param delimiter 155 * delimiter that marks the end of a token 156 * @param deleteDoubles 157 * if it's true all string that are already within the resulting array will be 158 * deleted, so that there will only be one copy of them. 159 */ 160 public static String[] toArray( String s, String delimiter, boolean deleteDoubles ) { 161 if ( s == null || s.equals( "" ) ) { 162 return new String[0]; 163 } 164 165 StringTokenizer st = new StringTokenizer( s, delimiter ); 166 ArrayList<String> vec = new ArrayList<String>( st.countTokens() ); 167 168 if ( st.countTokens() > 0 ) { 169 for ( int i = 0; st.hasMoreTokens(); i++ ) { 170 String t = st.nextToken(); 171 if ( ( t != null ) && ( t.length() > 0 ) ) { 172 vec.add( t.trim() ); 173 } 174 } 175 } else { 176 vec.add( s ); 177 } 178 179 String[] kw = vec.toArray( new String[vec.size()] ); 180 if ( deleteDoubles ) { 181 kw = deleteDoubles( kw ); 182 } 183 184 return kw; 185 } 186 187 /** 188 * parse a string and return its tokens as typed List 189 * 190 * @param s 191 * string to parse 192 * @param delimiter 193 * delimiter that marks the end of a token 194 * @param deleteDoubles 195 * if it's true all string that are already within the resulting array will be 196 * deleted, so that there will only be one copy of them. 197 * @return 198 */ 199 public static List<String> toList( String s, String delimiter, boolean deleteDoubles ) { 200 if ( s == null || s.equals( "" ) ) { 201 return new ArrayList<String>(); 202 } 203 204 StringTokenizer st = new StringTokenizer( s, delimiter ); 205 ArrayList<String> vec = new ArrayList<String>( st.countTokens() ); 206 for ( int i = 0; st.hasMoreTokens(); i++ ) { 207 String t = st.nextToken(); 208 if ( ( t != null ) && ( t.length() > 0 ) ) { 209 if ( deleteDoubles ) { 210 if ( !vec.contains( t.trim() ) ) { 211 vec.add( t.trim() ); 212 } 213 } else { 214 vec.add( t.trim() ); 215 } 216 } 217 } 218 219 return vec; 220 } 221 222 /** 223 * transforms a string array to one string. the array fields are seperated by the submitted 224 * delimiter: 225 * 226 * @param s 227 * stringarray to transform 228 * @param delimiter 229 */ 230 public static String arrayToString( String[] s, char delimiter ) { 231 StringBuffer res = new StringBuffer( s.length * 20 ); 232 233 for ( int i = 0; i < s.length; i++ ) { 234 res.append( s[i] ); 235 236 if ( i < ( s.length - 1 ) ) { 237 res.append( delimiter ); 238 } 239 } 240 241 return res.toString(); 242 } 243 244 /** 245 * transforms a list to one string. the array fields are seperated by the submitted delimiter: 246 * 247 * @param s 248 * stringarray to transform 249 * @param delimiter 250 */ 251 public static String listToString( List s, char delimiter ) { 252 StringBuffer res = new StringBuffer( s.size() * 20 ); 253 254 for ( int i = 0; i < s.size(); i++ ) { 255 res.append( s.get( i ) ); 256 257 if ( i < ( s.size() - 1 ) ) { 258 res.append( delimiter ); 259 } 260 } 261 262 return res.toString(); 263 } 264 265 /** 266 * transforms a double array to one string. the array fields are seperated by the submitted 267 * delimiter: 268 * 269 * @param s 270 * stringarray to transform 271 * @param delimiter 272 */ 273 public static String arrayToString( double[] s, char delimiter ) { 274 StringBuffer res = new StringBuffer( s.length * 20 ); 275 276 for ( int i = 0; i < s.length; i++ ) { 277 res.append( Double.toString( s[i] ) ); 278 279 if ( i < ( s.length - 1 ) ) { 280 res.append( delimiter ); 281 } 282 } 283 284 return res.toString(); 285 } 286 287 /** 288 * transforms a int array to one string. the array fields are seperated by the submitted 289 * delimiter: 290 * 291 * @param s 292 * stringarray to transform 293 * @param delimiter 294 */ 295 public static String arrayToString( int[] s, char delimiter ) { 296 StringBuffer res = new StringBuffer( s.length * 20 ); 297 298 for ( int i = 0; i < s.length; i++ ) { 299 res.append( Integer.toString( s[i] ) ); 300 301 if ( i < ( s.length - 1 ) ) { 302 res.append( delimiter ); 303 } 304 } 305 306 return res.toString(); 307 } 308 309 /** 310 * clears the begin and end of a string from the strings sumitted 311 * 312 * @param s 313 * string to validate 314 * @param mark 315 * string to remove from begin and end of <code>s</code> 316 */ 317 public static String validateString( String s, String mark ) { 318 if ( s == null ) { 319 return null; 320 } 321 322 if ( s.length() == 0 ) { 323 return s; 324 } 325 326 s = s.trim(); 327 328 while ( s.startsWith( mark ) ) { 329 s = s.substring( mark.length(), s.length() ).trim(); 330 } 331 332 while ( s.endsWith( mark ) ) { 333 s = s.substring( 0, s.length() - mark.length() ).trim(); 334 } 335 336 return s; 337 } 338 339 /** 340 * deletes all double entries from the submitted array 341 */ 342 public static String[] deleteDoubles( String[] s ) { 343 ArrayList<String> vec = new ArrayList<String>( s.length ); 344 345 for ( int i = 0; i < s.length; i++ ) { 346 if ( !vec.contains( s[i] ) ) { 347 vec.add( s[i] ); 348 } 349 } 350 351 return vec.toArray( new String[vec.size()] ); 352 } 353 354 /** 355 * removes all fields from the array that equals <code>s</code> 356 * 357 * @param target 358 * array where to remove the submitted string 359 * @param s 360 * string to remove 361 */ 362 public static String[] removeFromArray( String[] target, String s ) { 363 ArrayList<String> vec = new ArrayList<String>( target.length ); 364 365 for ( int i = 0; i < target.length; i++ ) { 366 if ( !target[i].equals( s ) ) { 367 vec.add( target[i] ); 368 } 369 } 370 371 return vec.toArray( new String[vec.size()] ); 372 } 373 374 /** 375 * checks if the submitted array contains the string <code>value</code> 376 * 377 * @param target 378 * array to check if it contains <code>value</code> 379 * @param value 380 * string to check if it within the array 381 */ 382 public static boolean contains( String[] target, String value ) { 383 if ( target == null || value == null ) { 384 return false; 385 } 386 387 if ( value.endsWith( "," ) ) { 388 value = value.substring( 0, value.length() - 1 ); 389 } 390 391 for ( int i = 0; i < target.length; i++ ) { 392 if ( value.equalsIgnoreCase( target[i] ) ) { 393 return true; 394 } 395 } 396 397 return false; 398 } 399 400 /** 401 * convert the array of string like [(x1,y1),(x2,y2)...] into an array of double 402 * [x1,y1,x2,y2...] 403 * 404 * @param s 405 * @param delimiter 406 * 407 * @return 408 */ 409 public static double[] toArrayDouble( String s, String delimiter ) { 410 if ( s == null ) { 411 return null; 412 } 413 414 if ( s.equals( "" ) ) { 415 return null; 416 } 417 418 StringTokenizer st = new StringTokenizer( s, delimiter ); 419 420 ArrayList<String> vec = new ArrayList<String>( st.countTokens() ); 421 422 for ( int i = 0; st.hasMoreTokens(); i++ ) { 423 String t = st.nextToken().replace( ' ', '+' ); 424 425 if ( ( t != null ) && ( t.length() > 0 ) ) { 426 vec.add( t.trim().replace( ',', '.' ) ); 427 } 428 } 429 430 double[] array = new double[vec.size()]; 431 432 for ( int i = 0; i < vec.size(); i++ ) { 433 array[i] = Double.parseDouble( vec.get( i ) ); 434 } 435 436 return array; 437 } 438 439 /** 440 * convert the array of string like [(x1,y1),(x2,y2)...] into an array of 441 * float values [x1,y1,x2,y2...] 442 * 443 * @param s 444 * @param delimiter 445 * 446 * @return 447 */ 448 public static float[] toArrayFloat( String s, String delimiter ) { 449 if ( s == null ) { 450 return null; 451 } 452 453 if ( s.equals( "" ) ) { 454 return null; 455 } 456 457 StringTokenizer st = new StringTokenizer( s, delimiter ); 458 459 ArrayList<String> vec = new ArrayList<String>( st.countTokens() ); 460 for ( int i = 0; st.hasMoreTokens(); i++ ) { 461 String t = st.nextToken().replace( ' ', '+' ); 462 if ( ( t != null ) && ( t.length() > 0 ) ) { 463 vec.add( t.trim().replace( ',', '.' ) ); 464 } 465 } 466 467 float[] array = new float[vec.size()]; 468 469 for ( int i = 0; i < vec.size(); i++ ) { 470 array[i] = Float.parseFloat( vec.get( i ) ); 471 } 472 473 return array; 474 } 475 476 /** 477 * transforms an array of StackTraceElements into a String 478 */ 479 public static String stackTraceToString( StackTraceElement[] se ) { 480 481 StringBuffer sb = new StringBuffer(); 482 for ( int i = 0; i < se.length; i++ ) { 483 sb.append( se[i].getClassName() + " " ); 484 sb.append( se[i].getFileName() + " " ); 485 sb.append( se[i].getMethodName() + "(" ); 486 sb.append( se[i].getLineNumber() + ")\n" ); 487 } 488 return sb.toString(); 489 } 490 491 /** 492 * gets the stacktrace array from the passed Excption and transforms it into a String 493 */ 494 public static String stackTraceToString( Throwable e ) { 495 496 StackTraceElement[] se = e.getStackTrace(); 497 StringBuffer sb = new StringBuffer(); 498 sb.append( e.getMessage() ).append( "\n" ); 499 sb.append( e.getClass().getName() ).append( "\n" ); 500 for ( int i = 0; i < se.length; i++ ) { 501 sb.append( se[i].getClassName() + " " ); 502 sb.append( se[i].getFileName() + " " ); 503 sb.append( se[i].getMethodName() + "(" ); 504 sb.append( se[i].getLineNumber() + ")\n" ); 505 if ( i > 4 ) 506 break; 507 } 508 return sb.toString(); 509 } 510 511 /** 512 * countString count the occurrences of token into target 513 * 514 * @param target 515 * @param token 516 * 517 * @return 518 */ 519 public static int countString( String target, String token ) { 520 int start = target.indexOf( token ); 521 int count = 0; 522 523 while ( start != -1 ) { 524 count++; 525 start = target.indexOf( token, start + 1 ); 526 } 527 528 return count; 529 } 530 531 /** 532 * Extract all the strings that begin with "start" and end with "end" and store it into an array 533 * of String 534 * 535 * @param target 536 * @param startString 537 * @param endString 538 * 539 * @return <code>null</code> if no strings were found!! 540 */ 541 public static String[] extractStrings( String target, String startString, String endString ) { 542 int start = target.indexOf( startString ); 543 544 if ( start == -1 ) { 545 return null; 546 } 547 548 int count = countString( target, startString ); 549 String[] subString = null; 550 if ( startString.equals( endString ) ) { 551 count = count / 2; 552 subString = new String[count]; 553 for ( int i = 0; i < count; i++ ) { 554 int tmp = target.indexOf( endString, start + 1 ); 555 subString[i] = target.substring( start, tmp + 1 ); 556 start = target.indexOf( startString, tmp + 1 ); 557 } 558 } else { 559 subString = new String[count]; 560 for ( int i = 0; i < count; i++ ) { 561 subString[i] = target.substring( start, target.indexOf( endString, start + 1 ) + 1 ); 562 subString[i] = extractString( subString[i], startString, endString, true, true ); 563 start = target.indexOf( startString, start + 1 ); 564 } 565 } 566 567 return subString; 568 } 569 570 /** 571 * extract a string contained between startDel and endDel, you can remove the delimiters if set 572 * true the parameters delStart and delEnd 573 * 574 * @param target 575 * @param startDel 576 * @param endDel 577 * @param delStart 578 * @param delEnd 579 * 580 * @return 581 */ 582 public static String extractString( String target, String startDel, String endDel, 583 boolean delStart, boolean delEnd ) { 584 int start = target.indexOf( startDel ); 585 586 if ( start == -1 ) { 587 return null; 588 } 589 590 String s = target.substring( start, target.indexOf( endDel, start + 1 ) + 1 ); 591 592 s = s.trim(); 593 594 if ( delStart ) { 595 while ( s.startsWith( startDel ) ) { 596 s = s.substring( startDel.length(), s.length() ).trim(); 597 } 598 } 599 600 if ( delEnd ) { 601 while ( s.endsWith( endDel ) ) { 602 s = s.substring( 0, s.length() - endDel.length() ).trim(); 603 } 604 } 605 606 return s; 607 } 608 609 /** 610 * Initialize the substitution map with all normalization rules for a given locale and 611 * add this map to the static localeMap. 612 * 613 * @param locale 614 * @throws IOException 615 * @throws SAXException 616 * @throws XMLParsingException 617 */ 618 private static void initMap( String locale ) 619 throws IOException, SAXException, XMLParsingException { 620 621 // read normalization file 622 StringBuffer sb = new StringBuffer( 1000 ); 623 InputStream is = StringTools.class.getResourceAsStream( "/normalization.xml" ); 624 if ( is == null ){ 625 is = StringTools.class.getResourceAsStream( "normalization.xml" ); 626 } 627 BufferedReader br = new BufferedReader( new InputStreamReader( is ) ); 628 String s = null; 629 while ( ( s = br.readLine() ) != null ) { 630 sb.append( s ); 631 } 632 br.close(); 633 634 // transform into xml fragment 635 XMLFragment xml = new XMLFragment(); 636 xml.load( new StringReader( sb.toString() ), 637 StringTools.class.getResource( "normalization.xml" ).toString() ); //FIXME 638 639 // create map 640 Map<String, String> substitutionMap = new HashMap<String, String>( 20 ); 641 642 // extract case attrib ( "toLower" or "toUpper" or missing ) for passed locale 643 String xpath = "Locale[@name = '" + Locale.GERMANY.getLanguage() + "']/@case"; 644 String letterCase = XMLTools.getNodeAsString( xml.getRootElement(), xpath, 645 CommonNamespaces.getNamespaceContext(), null ); 646 if ( letterCase != null ) { 647 substitutionMap.put( "case", letterCase ); 648 } 649 650 // extract removeDoubles attrib ( "true" or "false" ) for passed locale 651 xpath = "Locale[@name = '" + Locale.GERMANY.getLanguage() + "']/@removeDoubles"; 652 String removeDoubles = XMLTools.getNodeAsString( xml.getRootElement(), xpath, 653 CommonNamespaces.getNamespaceContext(), 654 null ); 655 if ( removeDoubles != null && removeDoubles.length() > 0 ) { 656 substitutionMap.put( "removeDoubles", removeDoubles ); 657 } 658 659 // extract rules section for passed locale 660 xpath = "Locale[@name = '" + locale + "']/Rule"; 661 List list = XMLTools.getNodes( xml.getRootElement(), xpath, 662 CommonNamespaces.getNamespaceContext() ); 663 if ( list != null ) { 664 for ( int i = 0; i < list.size(); i++ ) { 665 String src = XMLTools.getRequiredNodeAsString( (Node) list.get( i ), 666 "Source", 667 CommonNamespaces.getNamespaceContext() ); 668 String target = XMLTools.getRequiredNodeAsString( (Node) list.get( i ), 669 "Target", 670 CommonNamespaces.getNamespaceContext() ); 671 substitutionMap.put( src, target ); 672 } 673 } 674 675 // init localeMap if needed 676 if ( localeMap == null ) { 677 localeMap = new HashMap<String, Map<String, String>>( 20 ); 678 } 679 680 localeMap.put( locale, substitutionMap ); 681 } 682 683 /** 684 * The passed string gets normalized along the rules for the given locale as they are set in 685 * the file "./normalization.xml". 686 * If such rules are specified, the following order is obeyed: 687 * 688 * <ol> 689 * <li>if the attribute "case" is set with "toLower" or "toUpper", the letters are switched 690 * to lower case or to upper case respectively.</li> 691 * <li>all rules given in the "Rule" elements are performed.</li> 692 * <li>if the attribute "removeDoubles" is set and not empty, all multi occurences of the 693 * letters given in this attribute are reduced to a single occurence.</li> 694 * </ol> 695 * 696 * @param source the String to normalize 697 * @param locale the locale language defining the rules to choose, e.g. "de" 698 * @return the normalized String 699 * @throws IOException 700 * @throws SAXException 701 * @throws XMLParsingException 702 */ 703 public static String normalizeString( String source, String locale ) 704 throws IOException, SAXException, XMLParsingException { 705 706 if ( localeMap == null ) { 707 localeMap = new HashMap<String, Map<String, String>>( 20 ); 708 } 709 Map<String, String> substitutionMap = localeMap.get( locale ); 710 711 if ( substitutionMap == null ) { 712 initMap( locale ); 713 } 714 substitutionMap = localeMap.get( locale ); 715 716 String output = source; 717 Set<String> keys = substitutionMap.keySet(); 718 719 boolean toUpper = false; 720 boolean toLower = false; 721 boolean removeDoubles = false; 722 723 for ( String key : keys ) { 724 if ( "case".equals( key ) ) { 725 toUpper = "toUpper".equals( substitutionMap.get( key ) ); 726 toLower = "toLower".equals( substitutionMap.get( key ) ); 727 } 728 if ( "removeDoubles".equals( key ) && substitutionMap.get( key ).length() > 0 ) { 729 removeDoubles = true; 730 } 731 } 732 733 // first: change letters to upper / lower case 734 if ( toUpper ) { 735 output = output.toUpperCase(); 736 } else if ( toLower ) { 737 output = output.toLowerCase(); 738 } 739 740 // second: change string according to specified rules 741 for ( String key : keys ) { 742 if ( !"case".equals( key ) && !"removeDoubles".equals( key ) ) { 743 output = output.replaceAll( key, substitutionMap.get( key ) ); 744 } 745 } 746 747 // third: remove doubles 748 if ( removeDoubles ) { 749 String doubles = substitutionMap.get( "removeDoubles" ); 750 for ( int i = 0; i < doubles.length(); i++ ) { 751 String remove = "" + doubles.charAt( i ) + "+"; 752 String replaceWith = "" + doubles.charAt( i ); 753 output = output.replaceAll( remove, replaceWith ); 754 } 755 } 756 return output; 757 } 758 }