001 //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/branches/2.2_testing/src/org/deegree/framework/util/StringTools.java $ 002 /*---------------- FILE HEADER ------------------------------------------ 003 004 This file is part of deegree. 005 Copyright (C) 2001-2008 by: 006 EXSE, Department of Geography, University of Bonn 007 http://www.giub.uni-bonn.de/deegree/ 008 lat/lon GmbH 009 http://www.lat-lon.de 010 011 This library is free software; you can redistribute it and/or 012 modify it under the terms of the GNU Lesser General Public 013 License as published by the Free Software Foundation; either 014 version 2.1 of the License, or (at your option) any later version. 015 016 This library is distributed in the hope that it will be useful, 017 but WITHOUT ANY WARRANTY; without even the implied warranty of 018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 Lesser General Public License for more details. 020 021 You should have received a copy of the GNU Lesser General Public 022 License along with this library; if not, write to the Free Software 023 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 024 025 Contact: 026 027 Andreas Poth 028 lat/lon GmbH 029 Aennchenstr. 19 030 53115 Bonn 031 Germany 032 E-Mail: poth@lat-lon.de 033 034 Prof. Dr. Klaus Greve 035 Department of Geography 036 University of Bonn 037 Meckenheimer Allee 166 038 53115 Bonn 039 Germany 040 E-Mail: greve@giub.uni-bonn.de 041 042 043 ---------------------------------------------------------------------------*/ 044 package org.deegree.framework.util; 045 046 import java.io.BufferedReader; 047 import java.io.IOException; 048 import java.io.InputStream; 049 import java.io.InputStreamReader; 050 import java.io.StringReader; 051 import java.util.ArrayList; 052 import java.util.HashMap; 053 import java.util.List; 054 import java.util.Locale; 055 import java.util.Map; 056 import java.util.Set; 057 import java.util.StringTokenizer; 058 059 import org.deegree.framework.xml.XMLFragment; 060 import org.deegree.framework.xml.XMLParsingException; 061 import org.deegree.framework.xml.XMLTools; 062 import org.deegree.ogcbase.CommonNamespaces; 063 import org.w3c.dom.Node; 064 import org.xml.sax.SAXException; 065 066 /** 067 * this is a collection of some methods that extends the functionallity of the sun-java string 068 * class. 069 * 070 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 071 * @author last edited by: $Author: apoth $ 072 * 073 * @version $Revision: 9339 $, $Date: 2007-12-27 13:31:52 +0100 (Do, 27 Dez 2007) $ 074 */ 075 public class StringTools { 076 077 /** 078 * This map is used for methods normalizeString() and initMap(). 079 * 080 * key = locale language, e.g. "de" 081 * value = map of substitution rules for this locale 082 */ 083 private static Map<String, Map<String, String>> localeMap; 084 085 /** 086 * concatenates an array of strings using a 087 * 088 * @see StringBuffer 089 * 090 * @param size 091 * estimated size of the target string 092 * @param objects 093 * toString() will be called for each object to append it to the result string 094 * @return 095 */ 096 public static String concat( int size, Object... objects ) { 097 StringBuilder sbb = new StringBuilder( size ); 098 for ( int i = 0; i < objects.length; i++ ) { 099 sbb.append( objects[i] ); 100 } 101 return sbb.toString(); 102 } 103 104 /** 105 * replaces occurences of a string fragment within a string by a new string. 106 * 107 * @param target 108 * is the original string 109 * @param from 110 * is the string to be replaced 111 * @param to 112 * is the string which will used to replace 113 * @param all 114 * if it's true all occurences of the string to be replaced will be replaced. else 115 * only the first occurence will be replaced. 116 * @return the changed target string 117 */ 118 public static String replace( String target, String from, String to, boolean all ) { 119 120 StringBuffer buffer = new StringBuffer( target.length() ); 121 int copyFrom = 0; 122 char[] targetChars = null; 123 int lf = from.length(); 124 int start = -1; 125 do { 126 start = target.indexOf( from ); 127 copyFrom = 0; 128 if ( start == -1 ) { 129 return target; 130 } 131 132 targetChars = target.toCharArray(); 133 while ( start != -1 ) { 134 buffer.append( targetChars, copyFrom, start - copyFrom ); 135 buffer.append( to ); 136 copyFrom = start + lf; 137 start = target.indexOf( from, copyFrom ); 138 if ( !all ) { 139 start = -1; 140 } 141 } 142 buffer.append( targetChars, copyFrom, targetChars.length - copyFrom ); 143 target = buffer.toString(); 144 buffer.delete( 0, buffer.length() ); 145 } while ( target.indexOf( from ) > -1 && to.indexOf( from ) < 0 ); 146 147 return target; 148 } 149 150 /** 151 * parse a string and return its tokens as array 152 * 153 * @param s 154 * string to parse 155 * @param delimiter 156 * delimiter that marks the end of a token 157 * @param deleteDoubles 158 * if it's true all string that are already within the resulting array will be 159 * deleted, so that there will only be one copy of them. 160 * @return 161 */ 162 public static String[] toArray( String s, String delimiter, boolean deleteDoubles ) { 163 if ( s == null || s.equals( "" ) ) { 164 return new String[0]; 165 } 166 167 StringTokenizer st = new StringTokenizer( s, delimiter ); 168 ArrayList<String> vec = new ArrayList<String>( st.countTokens() ); 169 170 if ( st.countTokens() > 0 ) { 171 for ( int i = 0; st.hasMoreTokens(); i++ ) { 172 String t = st.nextToken(); 173 if ( ( t != null ) && ( t.length() > 0 ) ) { 174 vec.add( t.trim() ); 175 } 176 } 177 } else { 178 vec.add( s ); 179 } 180 181 String[] kw = vec.toArray( new String[vec.size()] ); 182 if ( deleteDoubles ) { 183 kw = deleteDoubles( kw ); 184 } 185 186 return kw; 187 } 188 189 /** 190 * parse a string and return its tokens as typed List 191 * 192 * @param s 193 * string to parse 194 * @param delimiter 195 * delimiter that marks the end of a token 196 * @param deleteDoubles 197 * if it's true all string that are already within the resulting array will be 198 * deleted, so that there will only be one copy of them. 199 * @return 200 */ 201 public static List<String> toList( String s, String delimiter, boolean deleteDoubles ) { 202 if ( s == null || s.equals( "" ) ) { 203 return new ArrayList<String>(); 204 } 205 206 StringTokenizer st = new StringTokenizer( s, delimiter ); 207 ArrayList<String> vec = new ArrayList<String>( st.countTokens() ); 208 for ( int i = 0; st.hasMoreTokens(); i++ ) { 209 String t = st.nextToken(); 210 if ( ( t != null ) && ( t.length() > 0 ) ) { 211 if ( deleteDoubles ) { 212 if ( !vec.contains( t.trim() ) ) { 213 vec.add( t.trim() ); 214 } 215 } else { 216 vec.add( t.trim() ); 217 } 218 } 219 } 220 221 return vec; 222 } 223 224 /** 225 * transforms a string array to one string. the array fields are seperated by the submitted 226 * delimiter: 227 * 228 * @param s 229 * stringarray to transform 230 * @param delimiter 231 */ 232 public static String arrayToString( String[] s, char delimiter ) { 233 StringBuffer res = new StringBuffer( s.length * 20 ); 234 235 for ( int i = 0; i < s.length; i++ ) { 236 res.append( s[i] ); 237 238 if ( i < ( s.length - 1 ) ) { 239 res.append( delimiter ); 240 } 241 } 242 243 return res.toString(); 244 } 245 246 /** 247 * transforms a list to one string. the array fields are seperated by the submitted delimiter: 248 * 249 * @param s 250 * stringarray to transform 251 * @param delimiter 252 */ 253 public static String listToString( List s, char delimiter ) { 254 StringBuffer res = new StringBuffer( s.size() * 20 ); 255 256 for ( int i = 0; i < s.size(); i++ ) { 257 res.append( s.get( i ) ); 258 259 if ( i < ( s.size() - 1 ) ) { 260 res.append( delimiter ); 261 } 262 } 263 264 return res.toString(); 265 } 266 267 /** 268 * transforms a double array to one string. the array fields are seperated by the submitted 269 * delimiter: 270 * 271 * @param s 272 * stringarray to transform 273 * @param delimiter 274 */ 275 public static String arrayToString( double[] s, char delimiter ) { 276 StringBuffer res = new StringBuffer( s.length * 20 ); 277 278 for ( int i = 0; i < s.length; i++ ) { 279 res.append( Double.toString( s[i] ) ); 280 281 if ( i < ( s.length - 1 ) ) { 282 res.append( delimiter ); 283 } 284 } 285 286 return res.toString(); 287 } 288 289 /** 290 * transforms a int array to one string. the array fields are seperated by the submitted 291 * delimiter: 292 * 293 * @param s 294 * stringarray to transform 295 * @param delimiter 296 */ 297 public static String arrayToString( int[] s, char delimiter ) { 298 StringBuffer res = new StringBuffer( s.length * 20 ); 299 300 for ( int i = 0; i < s.length; i++ ) { 301 res.append( Integer.toString( s[i] ) ); 302 303 if ( i < ( s.length - 1 ) ) { 304 res.append( delimiter ); 305 } 306 } 307 308 return res.toString(); 309 } 310 311 /** 312 * clears the begin and end of a string from the strings sumitted 313 * 314 * @param s 315 * string to validate 316 * @param mark 317 * string to remove from begin and end of <code>s</code> 318 */ 319 public static String validateString( String s, String mark ) { 320 if ( s == null ) { 321 return null; 322 } 323 324 if ( s.length() == 0 ) { 325 return s; 326 } 327 328 s = s.trim(); 329 330 while ( s.startsWith( mark ) ) { 331 s = s.substring( mark.length(), s.length() ).trim(); 332 } 333 334 while ( s.endsWith( mark ) ) { 335 s = s.substring( 0, s.length() - mark.length() ).trim(); 336 } 337 338 return s; 339 } 340 341 /** 342 * deletes all double entries from the submitted array 343 */ 344 public static String[] deleteDoubles( String[] s ) { 345 ArrayList<String> vec = new ArrayList<String>( s.length ); 346 347 for ( int i = 0; i < s.length; i++ ) { 348 if ( !vec.contains( s[i] ) ) { 349 vec.add( s[i] ); 350 } 351 } 352 353 return vec.toArray( new String[vec.size()] ); 354 } 355 356 /** 357 * removes all fields from the array that equals <code>s</code> 358 * 359 * @param target 360 * array where to remove the submitted string 361 * @param s 362 * string to remove 363 */ 364 public static String[] removeFromArray( String[] target, String s ) { 365 ArrayList<String> vec = new ArrayList<String>( target.length ); 366 367 for ( int i = 0; i < target.length; i++ ) { 368 if ( !target[i].equals( s ) ) { 369 vec.add( target[i] ); 370 } 371 } 372 373 return vec.toArray( new String[vec.size()] ); 374 } 375 376 /** 377 * checks if the submitted array contains the string <code>value</code> 378 * 379 * @param target 380 * array to check if it contains <code>value</code> 381 * @param value 382 * string to check if it within the array 383 */ 384 public static boolean contains( String[] target, String value ) { 385 if ( target == null || value == null ) { 386 return false; 387 } 388 389 if ( value.endsWith( "," ) ) { 390 value = value.substring( 0, value.length() - 1 ); 391 } 392 393 for ( int i = 0; i < target.length; i++ ) { 394 if ( value.equalsIgnoreCase( target[i] ) ) { 395 return true; 396 } 397 } 398 399 return false; 400 } 401 402 /** 403 * convert the array of string like [(x1,y1),(x2,y2)...] into an array of double 404 * [x1,y1,x2,y2...] 405 * 406 * @param s 407 * @param delimiter 408 * 409 * @return 410 */ 411 public static double[] toArrayDouble( String s, String delimiter ) { 412 if ( s == null ) { 413 return null; 414 } 415 416 if ( s.equals( "" ) ) { 417 return null; 418 } 419 420 StringTokenizer st = new StringTokenizer( s, delimiter ); 421 422 ArrayList<String> vec = new ArrayList<String>( st.countTokens() ); 423 424 for ( int i = 0; st.hasMoreTokens(); i++ ) { 425 String t = st.nextToken().replace( ' ', '+' ); 426 427 if ( ( t != null ) && ( t.length() > 0 ) ) { 428 vec.add( t.trim().replace( ',', '.' ) ); 429 } 430 } 431 432 double[] array = new double[vec.size()]; 433 434 for ( int i = 0; i < vec.size(); i++ ) { 435 array[i] = Double.parseDouble( vec.get( i ) ); 436 } 437 438 return array; 439 } 440 441 /** 442 * convert the array of string like [(x1,y1),(x2,y2)...] into an array of 443 * float values [x1,y1,x2,y2...] 444 * 445 * @param s 446 * @param delimiter 447 * 448 * @return 449 */ 450 public static float[] toArrayFloat( String s, String delimiter ) { 451 if ( s == null ) { 452 return null; 453 } 454 455 if ( s.equals( "" ) ) { 456 return null; 457 } 458 459 StringTokenizer st = new StringTokenizer( s, delimiter ); 460 461 ArrayList<String> vec = new ArrayList<String>( st.countTokens() ); 462 for ( int i = 0; st.hasMoreTokens(); i++ ) { 463 String t = st.nextToken().replace( ' ', '+' ); 464 if ( ( t != null ) && ( t.length() > 0 ) ) { 465 vec.add( t.trim().replace( ',', '.' ) ); 466 } 467 } 468 469 float[] array = new float[vec.size()]; 470 471 for ( int i = 0; i < vec.size(); i++ ) { 472 array[i] = Float.parseFloat( vec.get( i ) ); 473 } 474 475 return array; 476 } 477 478 /** 479 * transforms an array of StackTraceElements into a String 480 */ 481 public static String stackTraceToString( StackTraceElement[] se ) { 482 483 StringBuffer sb = new StringBuffer(); 484 for ( int i = 0; i < se.length; i++ ) { 485 sb.append( se[i].getClassName() + " " ); 486 sb.append( se[i].getFileName() + " " ); 487 sb.append( se[i].getMethodName() + "(" ); 488 sb.append( se[i].getLineNumber() + ")\n" ); 489 } 490 return sb.toString(); 491 } 492 493 /** 494 * gets the stacktrace array from the passed Excption and transforms it into a String 495 */ 496 public static String stackTraceToString( Throwable e ) { 497 498 StackTraceElement[] se = e.getStackTrace(); 499 StringBuffer sb = new StringBuffer(); 500 sb.append( e.getMessage() ).append( "\n" ); 501 sb.append( e.getClass().getName() ).append( "\n" ); 502 for ( int i = 0; i < se.length; i++ ) { 503 sb.append( se[i].getClassName() + " " ); 504 sb.append( se[i].getFileName() + " " ); 505 sb.append( se[i].getMethodName() + "(" ); 506 sb.append( se[i].getLineNumber() + ")\n" ); 507 if ( i > 4 ) 508 break; 509 } 510 return sb.toString(); 511 } 512 513 /** 514 * countString count the occurrences of token into target 515 * 516 * @param target 517 * @param token 518 * 519 * @return 520 */ 521 public static int countString( String target, String token ) { 522 int start = target.indexOf( token ); 523 int count = 0; 524 525 while ( start != -1 ) { 526 count++; 527 start = target.indexOf( token, start + 1 ); 528 } 529 530 return count; 531 } 532 533 /** 534 * Extract all the strings that begin with "start" and end with "end" and store it into an array 535 * of String 536 * 537 * @param target 538 * @param startString 539 * @param endString 540 * 541 * @return <code>null</code> if no strings were found!! 542 */ 543 public static String[] extractStrings( String target, String startString, String endString ) { 544 int start = target.indexOf( startString ); 545 546 if ( start == -1 ) { 547 return null; 548 } 549 550 int count = countString( target, startString ); 551 String[] subString = null; 552 if ( startString.equals( endString ) ) { 553 count = count / 2; 554 subString = new String[count]; 555 for ( int i = 0; i < count; i++ ) { 556 int tmp = target.indexOf( endString, start + 1 ); 557 subString[i] = target.substring( start, tmp + 1 ); 558 start = target.indexOf( startString, tmp + 1 ); 559 } 560 } else { 561 subString = new String[count]; 562 for ( int i = 0; i < count; i++ ) { 563 subString[i] = target.substring( start, target.indexOf( endString, start + 1 ) + 1 ); 564 subString[i] = extractString( subString[i], startString, endString, true, true ); 565 start = target.indexOf( startString, start + 1 ); 566 } 567 } 568 569 return subString; 570 } 571 572 /** 573 * extract a string contained between startDel and endDel, you can remove the delimiters if set 574 * true the parameters delStart and delEnd 575 * 576 * @param target 577 * @param startDel 578 * @param endDel 579 * @param delStart 580 * @param delEnd 581 * 582 * @return 583 */ 584 public static String extractString( String target, String startDel, String endDel, 585 boolean delStart, boolean delEnd ) { 586 int start = target.indexOf( startDel ); 587 588 if ( start == -1 ) { 589 return null; 590 } 591 592 String s = target.substring( start, target.indexOf( endDel, start + 1 ) + 1 ); 593 594 s = s.trim(); 595 596 if ( delStart ) { 597 while ( s.startsWith( startDel ) ) { 598 s = s.substring( startDel.length(), s.length() ).trim(); 599 } 600 } 601 602 if ( delEnd ) { 603 while ( s.endsWith( endDel ) ) { 604 s = s.substring( 0, s.length() - endDel.length() ).trim(); 605 } 606 } 607 608 return s; 609 } 610 611 /** 612 * Initialize the substitution map with all normalization rules for a given locale and 613 * add this map to the static localeMap. 614 * 615 * @param locale 616 * @throws IOException 617 * @throws SAXException 618 * @throws XMLParsingException 619 */ 620 private static void initMap( String locale ) 621 throws IOException, SAXException, XMLParsingException { 622 623 // read normalization file 624 StringBuffer sb = new StringBuffer( 1000 ); 625 InputStream is = StringTools.class.getResourceAsStream( "/normalization.xml" ); 626 if ( is == null ){ 627 is = StringTools.class.getResourceAsStream( "normalization.xml" ); 628 } 629 BufferedReader br = new BufferedReader( new InputStreamReader( is ) ); 630 String s = null; 631 while ( ( s = br.readLine() ) != null ) { 632 sb.append( s ); 633 } 634 br.close(); 635 636 // transform into xml fragment 637 XMLFragment xml = new XMLFragment(); 638 xml.load( new StringReader( sb.toString() ), 639 StringTools.class.getResource( "normalization.xml" ).toString() ); //FIXME 640 641 // create map 642 Map<String, String> substitutionMap = new HashMap<String, String>( 20 ); 643 644 // extract case attrib ( "toLower" or "toUpper" or missing ) for passed locale 645 String xpath = "Locale[@name = '" + Locale.GERMANY.getLanguage() + "']/@case"; 646 String letterCase = XMLTools.getNodeAsString( xml.getRootElement(), xpath, 647 CommonNamespaces.getNamespaceContext(), null ); 648 if ( letterCase != null ) { 649 substitutionMap.put( "case", letterCase ); 650 } 651 652 // extract removeDoubles attrib ( "true" or "false" ) for passed locale 653 xpath = "Locale[@name = '" + Locale.GERMANY.getLanguage() + "']/@removeDoubles"; 654 String removeDoubles = XMLTools.getNodeAsString( xml.getRootElement(), xpath, 655 CommonNamespaces.getNamespaceContext(), 656 null ); 657 if ( removeDoubles != null && removeDoubles.length() > 0 ) { 658 substitutionMap.put( "removeDoubles", removeDoubles ); 659 } 660 661 // extract rules section for passed locale 662 xpath = "Locale[@name = '" + locale + "']/Rule"; 663 List list = XMLTools.getNodes( xml.getRootElement(), xpath, 664 CommonNamespaces.getNamespaceContext() ); 665 if ( list != null ) { 666 for ( int i = 0; i < list.size(); i++ ) { 667 String src = XMLTools.getRequiredNodeAsString( (Node) list.get( i ), 668 "Source", 669 CommonNamespaces.getNamespaceContext() ); 670 String target = XMLTools.getRequiredNodeAsString( (Node) list.get( i ), 671 "Target", 672 CommonNamespaces.getNamespaceContext() ); 673 substitutionMap.put( src, target ); 674 } 675 } 676 677 // init localeMap if needed 678 if ( localeMap == null ) { 679 localeMap = new HashMap<String, Map<String, String>>( 20 ); 680 } 681 682 localeMap.put( locale, substitutionMap ); 683 } 684 685 /** 686 * The passed string gets normalized along the rules for the given locale as they are set in 687 * the file "./normalization.xml". 688 * If such rules are specified, the following order is obeyed: 689 * 690 * <ol> 691 * <li>if the attribute "case" is set with "toLower" or "toUpper", the letters are switched 692 * to lower case or to upper case respectively.</li> 693 * <li>all rules given in the "Rule" elements are performed.</li> 694 * <li>if the attribute "removeDoubles" is set and not empty, all multi occurences of the 695 * letters given in this attribute are reduced to a single occurence.</li> 696 * </ol> 697 * 698 * @param source the String to normalize 699 * @param locale the locale language defining the rules to choose, e.g. "de" 700 * @return the normalized String 701 * @throws IOException 702 * @throws SAXException 703 * @throws XMLParsingException 704 */ 705 public static String normalizeString( String source, String locale ) 706 throws IOException, SAXException, XMLParsingException { 707 708 if ( localeMap == null ) { 709 localeMap = new HashMap<String, Map<String, String>>( 20 ); 710 } 711 Map<String, String> substitutionMap = localeMap.get( locale ); 712 713 if ( substitutionMap == null ) { 714 initMap( locale ); 715 } 716 substitutionMap = localeMap.get( locale ); 717 718 String output = source; 719 Set<String> keys = substitutionMap.keySet(); 720 721 boolean toUpper = false; 722 boolean toLower = false; 723 boolean removeDoubles = false; 724 725 for ( String key : keys ) { 726 if ( "case".equals( key ) ) { 727 toUpper = "toUpper".equals( substitutionMap.get( key ) ); 728 toLower = "toLower".equals( substitutionMap.get( key ) ); 729 } 730 if ( "removeDoubles".equals( key ) && substitutionMap.get( key ).length() > 0 ) { 731 removeDoubles = true; 732 } 733 } 734 735 // first: change letters to upper / lower case 736 if ( toUpper ) { 737 output = output.toUpperCase(); 738 } else if ( toLower ) { 739 output = output.toLowerCase(); 740 } 741 742 // second: change string according to specified rules 743 for ( String key : keys ) { 744 if ( !"case".equals( key ) && !"removeDoubles".equals( key ) ) { 745 output = output.replaceAll( key, substitutionMap.get( key ) ); 746 } 747 } 748 749 // third: remove doubles 750 if ( removeDoubles ) { 751 String doubles = substitutionMap.get( "removeDoubles" ); 752 for ( int i = 0; i < doubles.length(); i++ ) { 753 String remove = "" + doubles.charAt( i ) + "+"; 754 String replaceWith = "" + doubles.charAt( i ); 755 output = output.replaceAll( remove, replaceWith ); 756 } 757 } 758 return output; 759 } 760 }