001 //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/branches/2.2_testing/src/org/deegree/framework/util/StringTools.java $
002 /*---------------- FILE HEADER ------------------------------------------
003
004 This file is part of deegree.
005 Copyright (C) 2001-2008 by:
006 EXSE, Department of Geography, University of Bonn
007 http://www.giub.uni-bonn.de/deegree/
008 lat/lon GmbH
009 http://www.lat-lon.de
010
011 This library is free software; you can redistribute it and/or
012 modify it under the terms of the GNU Lesser General Public
013 License as published by the Free Software Foundation; either
014 version 2.1 of the License, or (at your option) any later version.
015
016 This library is distributed in the hope that it will be useful,
017 but WITHOUT ANY WARRANTY; without even the implied warranty of
018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 Lesser General Public License for more details.
020
021 You should have received a copy of the GNU Lesser General Public
022 License along with this library; if not, write to the Free Software
023 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024
025 Contact:
026
027 Andreas Poth
028 lat/lon GmbH
029 Aennchenstr. 19
030 53115 Bonn
031 Germany
032 E-Mail: poth@lat-lon.de
033
034 Prof. Dr. Klaus Greve
035 Department of Geography
036 University of Bonn
037 Meckenheimer Allee 166
038 53115 Bonn
039 Germany
040 E-Mail: greve@giub.uni-bonn.de
041
042
043 ---------------------------------------------------------------------------*/
044 package org.deegree.framework.util;
045
046 import java.io.BufferedReader;
047 import java.io.IOException;
048 import java.io.InputStream;
049 import java.io.InputStreamReader;
050 import java.io.StringReader;
051 import java.util.ArrayList;
052 import java.util.HashMap;
053 import java.util.List;
054 import java.util.Locale;
055 import java.util.Map;
056 import java.util.Set;
057 import java.util.StringTokenizer;
058
059 import org.deegree.framework.xml.XMLFragment;
060 import org.deegree.framework.xml.XMLParsingException;
061 import org.deegree.framework.xml.XMLTools;
062 import org.deegree.ogcbase.CommonNamespaces;
063 import org.w3c.dom.Node;
064 import org.xml.sax.SAXException;
065
066 /**
067 * this is a collection of some methods that extends the functionallity of the sun-java string
068 * class.
069 *
070 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
071 * @author last edited by: $Author: apoth $
072 *
073 * @version $Revision: 9339 $, $Date: 2007-12-27 13:31:52 +0100 (Do, 27 Dez 2007) $
074 */
075 public class StringTools {
076
077 /**
078 * This map is used for methods normalizeString() and initMap().
079 *
080 * key = locale language, e.g. "de"
081 * value = map of substitution rules for this locale
082 */
083 private static Map<String, Map<String, String>> localeMap;
084
085 /**
086 * concatenates an array of strings using a
087 *
088 * @see StringBuffer
089 *
090 * @param size
091 * estimated size of the target string
092 * @param objects
093 * toString() will be called for each object to append it to the result string
094 * @return
095 */
096 public static String concat( int size, Object... objects ) {
097 StringBuilder sbb = new StringBuilder( size );
098 for ( int i = 0; i < objects.length; i++ ) {
099 sbb.append( objects[i] );
100 }
101 return sbb.toString();
102 }
103
104 /**
105 * replaces occurences of a string fragment within a string by a new string.
106 *
107 * @param target
108 * is the original string
109 * @param from
110 * is the string to be replaced
111 * @param to
112 * is the string which will used to replace
113 * @param all
114 * if it's true all occurences of the string to be replaced will be replaced. else
115 * only the first occurence will be replaced.
116 * @return the changed target string
117 */
118 public static String replace( String target, String from, String to, boolean all ) {
119
120 StringBuffer buffer = new StringBuffer( target.length() );
121 int copyFrom = 0;
122 char[] targetChars = null;
123 int lf = from.length();
124 int start = -1;
125 do {
126 start = target.indexOf( from );
127 copyFrom = 0;
128 if ( start == -1 ) {
129 return target;
130 }
131
132 targetChars = target.toCharArray();
133 while ( start != -1 ) {
134 buffer.append( targetChars, copyFrom, start - copyFrom );
135 buffer.append( to );
136 copyFrom = start + lf;
137 start = target.indexOf( from, copyFrom );
138 if ( !all ) {
139 start = -1;
140 }
141 }
142 buffer.append( targetChars, copyFrom, targetChars.length - copyFrom );
143 target = buffer.toString();
144 buffer.delete( 0, buffer.length() );
145 } while ( target.indexOf( from ) > -1 && to.indexOf( from ) < 0 );
146
147 return target;
148 }
149
150 /**
151 * parse a string and return its tokens as array
152 *
153 * @param s
154 * string to parse
155 * @param delimiter
156 * delimiter that marks the end of a token
157 * @param deleteDoubles
158 * if it's true all string that are already within the resulting array will be
159 * deleted, so that there will only be one copy of them.
160 * @return
161 */
162 public static String[] toArray( String s, String delimiter, boolean deleteDoubles ) {
163 if ( s == null || s.equals( "" ) ) {
164 return new String[0];
165 }
166
167 StringTokenizer st = new StringTokenizer( s, delimiter );
168 ArrayList<String> vec = new ArrayList<String>( st.countTokens() );
169
170 if ( st.countTokens() > 0 ) {
171 for ( int i = 0; st.hasMoreTokens(); i++ ) {
172 String t = st.nextToken();
173 if ( ( t != null ) && ( t.length() > 0 ) ) {
174 vec.add( t.trim() );
175 }
176 }
177 } else {
178 vec.add( s );
179 }
180
181 String[] kw = vec.toArray( new String[vec.size()] );
182 if ( deleteDoubles ) {
183 kw = deleteDoubles( kw );
184 }
185
186 return kw;
187 }
188
189 /**
190 * parse a string and return its tokens as typed List
191 *
192 * @param s
193 * string to parse
194 * @param delimiter
195 * delimiter that marks the end of a token
196 * @param deleteDoubles
197 * if it's true all string that are already within the resulting array will be
198 * deleted, so that there will only be one copy of them.
199 * @return
200 */
201 public static List<String> toList( String s, String delimiter, boolean deleteDoubles ) {
202 if ( s == null || s.equals( "" ) ) {
203 return new ArrayList<String>();
204 }
205
206 StringTokenizer st = new StringTokenizer( s, delimiter );
207 ArrayList<String> vec = new ArrayList<String>( st.countTokens() );
208 for ( int i = 0; st.hasMoreTokens(); i++ ) {
209 String t = st.nextToken();
210 if ( ( t != null ) && ( t.length() > 0 ) ) {
211 if ( deleteDoubles ) {
212 if ( !vec.contains( t.trim() ) ) {
213 vec.add( t.trim() );
214 }
215 } else {
216 vec.add( t.trim() );
217 }
218 }
219 }
220
221 return vec;
222 }
223
224 /**
225 * transforms a string array to one string. the array fields are seperated by the submitted
226 * delimiter:
227 *
228 * @param s
229 * stringarray to transform
230 * @param delimiter
231 */
232 public static String arrayToString( String[] s, char delimiter ) {
233 StringBuffer res = new StringBuffer( s.length * 20 );
234
235 for ( int i = 0; i < s.length; i++ ) {
236 res.append( s[i] );
237
238 if ( i < ( s.length - 1 ) ) {
239 res.append( delimiter );
240 }
241 }
242
243 return res.toString();
244 }
245
246 /**
247 * transforms a list to one string. the array fields are seperated by the submitted delimiter:
248 *
249 * @param s
250 * stringarray to transform
251 * @param delimiter
252 */
253 public static String listToString( List s, char delimiter ) {
254 StringBuffer res = new StringBuffer( s.size() * 20 );
255
256 for ( int i = 0; i < s.size(); i++ ) {
257 res.append( s.get( i ) );
258
259 if ( i < ( s.size() - 1 ) ) {
260 res.append( delimiter );
261 }
262 }
263
264 return res.toString();
265 }
266
267 /**
268 * transforms a double array to one string. the array fields are seperated by the submitted
269 * delimiter:
270 *
271 * @param s
272 * stringarray to transform
273 * @param delimiter
274 */
275 public static String arrayToString( double[] s, char delimiter ) {
276 StringBuffer res = new StringBuffer( s.length * 20 );
277
278 for ( int i = 0; i < s.length; i++ ) {
279 res.append( Double.toString( s[i] ) );
280
281 if ( i < ( s.length - 1 ) ) {
282 res.append( delimiter );
283 }
284 }
285
286 return res.toString();
287 }
288
289 /**
290 * transforms a int array to one string. the array fields are seperated by the submitted
291 * delimiter:
292 *
293 * @param s
294 * stringarray to transform
295 * @param delimiter
296 */
297 public static String arrayToString( int[] s, char delimiter ) {
298 StringBuffer res = new StringBuffer( s.length * 20 );
299
300 for ( int i = 0; i < s.length; i++ ) {
301 res.append( Integer.toString( s[i] ) );
302
303 if ( i < ( s.length - 1 ) ) {
304 res.append( delimiter );
305 }
306 }
307
308 return res.toString();
309 }
310
311 /**
312 * clears the begin and end of a string from the strings sumitted
313 *
314 * @param s
315 * string to validate
316 * @param mark
317 * string to remove from begin and end of <code>s</code>
318 */
319 public static String validateString( String s, String mark ) {
320 if ( s == null ) {
321 return null;
322 }
323
324 if ( s.length() == 0 ) {
325 return s;
326 }
327
328 s = s.trim();
329
330 while ( s.startsWith( mark ) ) {
331 s = s.substring( mark.length(), s.length() ).trim();
332 }
333
334 while ( s.endsWith( mark ) ) {
335 s = s.substring( 0, s.length() - mark.length() ).trim();
336 }
337
338 return s;
339 }
340
341 /**
342 * deletes all double entries from the submitted array
343 */
344 public static String[] deleteDoubles( String[] s ) {
345 ArrayList<String> vec = new ArrayList<String>( s.length );
346
347 for ( int i = 0; i < s.length; i++ ) {
348 if ( !vec.contains( s[i] ) ) {
349 vec.add( s[i] );
350 }
351 }
352
353 return vec.toArray( new String[vec.size()] );
354 }
355
356 /**
357 * removes all fields from the array that equals <code>s</code>
358 *
359 * @param target
360 * array where to remove the submitted string
361 * @param s
362 * string to remove
363 */
364 public static String[] removeFromArray( String[] target, String s ) {
365 ArrayList<String> vec = new ArrayList<String>( target.length );
366
367 for ( int i = 0; i < target.length; i++ ) {
368 if ( !target[i].equals( s ) ) {
369 vec.add( target[i] );
370 }
371 }
372
373 return vec.toArray( new String[vec.size()] );
374 }
375
376 /**
377 * checks if the submitted array contains the string <code>value</code>
378 *
379 * @param target
380 * array to check if it contains <code>value</code>
381 * @param value
382 * string to check if it within the array
383 */
384 public static boolean contains( String[] target, String value ) {
385 if ( target == null || value == null ) {
386 return false;
387 }
388
389 if ( value.endsWith( "," ) ) {
390 value = value.substring( 0, value.length() - 1 );
391 }
392
393 for ( int i = 0; i < target.length; i++ ) {
394 if ( value.equalsIgnoreCase( target[i] ) ) {
395 return true;
396 }
397 }
398
399 return false;
400 }
401
402 /**
403 * convert the array of string like [(x1,y1),(x2,y2)...] into an array of double
404 * [x1,y1,x2,y2...]
405 *
406 * @param s
407 * @param delimiter
408 *
409 * @return
410 */
411 public static double[] toArrayDouble( String s, String delimiter ) {
412 if ( s == null ) {
413 return null;
414 }
415
416 if ( s.equals( "" ) ) {
417 return null;
418 }
419
420 StringTokenizer st = new StringTokenizer( s, delimiter );
421
422 ArrayList<String> vec = new ArrayList<String>( st.countTokens() );
423
424 for ( int i = 0; st.hasMoreTokens(); i++ ) {
425 String t = st.nextToken().replace( ' ', '+' );
426
427 if ( ( t != null ) && ( t.length() > 0 ) ) {
428 vec.add( t.trim().replace( ',', '.' ) );
429 }
430 }
431
432 double[] array = new double[vec.size()];
433
434 for ( int i = 0; i < vec.size(); i++ ) {
435 array[i] = Double.parseDouble( vec.get( i ) );
436 }
437
438 return array;
439 }
440
441 /**
442 * convert the array of string like [(x1,y1),(x2,y2)...] into an array of
443 * float values [x1,y1,x2,y2...]
444 *
445 * @param s
446 * @param delimiter
447 *
448 * @return
449 */
450 public static float[] toArrayFloat( String s, String delimiter ) {
451 if ( s == null ) {
452 return null;
453 }
454
455 if ( s.equals( "" ) ) {
456 return null;
457 }
458
459 StringTokenizer st = new StringTokenizer( s, delimiter );
460
461 ArrayList<String> vec = new ArrayList<String>( st.countTokens() );
462 for ( int i = 0; st.hasMoreTokens(); i++ ) {
463 String t = st.nextToken().replace( ' ', '+' );
464 if ( ( t != null ) && ( t.length() > 0 ) ) {
465 vec.add( t.trim().replace( ',', '.' ) );
466 }
467 }
468
469 float[] array = new float[vec.size()];
470
471 for ( int i = 0; i < vec.size(); i++ ) {
472 array[i] = Float.parseFloat( vec.get( i ) );
473 }
474
475 return array;
476 }
477
478 /**
479 * transforms an array of StackTraceElements into a String
480 */
481 public static String stackTraceToString( StackTraceElement[] se ) {
482
483 StringBuffer sb = new StringBuffer();
484 for ( int i = 0; i < se.length; i++ ) {
485 sb.append( se[i].getClassName() + " " );
486 sb.append( se[i].getFileName() + " " );
487 sb.append( se[i].getMethodName() + "(" );
488 sb.append( se[i].getLineNumber() + ")\n" );
489 }
490 return sb.toString();
491 }
492
493 /**
494 * gets the stacktrace array from the passed Excption and transforms it into a String
495 */
496 public static String stackTraceToString( Throwable e ) {
497
498 StackTraceElement[] se = e.getStackTrace();
499 StringBuffer sb = new StringBuffer();
500 sb.append( e.getMessage() ).append( "\n" );
501 sb.append( e.getClass().getName() ).append( "\n" );
502 for ( int i = 0; i < se.length; i++ ) {
503 sb.append( se[i].getClassName() + " " );
504 sb.append( se[i].getFileName() + " " );
505 sb.append( se[i].getMethodName() + "(" );
506 sb.append( se[i].getLineNumber() + ")\n" );
507 if ( i > 4 )
508 break;
509 }
510 return sb.toString();
511 }
512
513 /**
514 * countString count the occurrences of token into target
515 *
516 * @param target
517 * @param token
518 *
519 * @return
520 */
521 public static int countString( String target, String token ) {
522 int start = target.indexOf( token );
523 int count = 0;
524
525 while ( start != -1 ) {
526 count++;
527 start = target.indexOf( token, start + 1 );
528 }
529
530 return count;
531 }
532
533 /**
534 * Extract all the strings that begin with "start" and end with "end" and store it into an array
535 * of String
536 *
537 * @param target
538 * @param startString
539 * @param endString
540 *
541 * @return <code>null</code> if no strings were found!!
542 */
543 public static String[] extractStrings( String target, String startString, String endString ) {
544 int start = target.indexOf( startString );
545
546 if ( start == -1 ) {
547 return null;
548 }
549
550 int count = countString( target, startString );
551 String[] subString = null;
552 if ( startString.equals( endString ) ) {
553 count = count / 2;
554 subString = new String[count];
555 for ( int i = 0; i < count; i++ ) {
556 int tmp = target.indexOf( endString, start + 1 );
557 subString[i] = target.substring( start, tmp + 1 );
558 start = target.indexOf( startString, tmp + 1 );
559 }
560 } else {
561 subString = new String[count];
562 for ( int i = 0; i < count; i++ ) {
563 subString[i] = target.substring( start, target.indexOf( endString, start + 1 ) + 1 );
564 subString[i] = extractString( subString[i], startString, endString, true, true );
565 start = target.indexOf( startString, start + 1 );
566 }
567 }
568
569 return subString;
570 }
571
572 /**
573 * extract a string contained between startDel and endDel, you can remove the delimiters if set
574 * true the parameters delStart and delEnd
575 *
576 * @param target
577 * @param startDel
578 * @param endDel
579 * @param delStart
580 * @param delEnd
581 *
582 * @return
583 */
584 public static String extractString( String target, String startDel, String endDel,
585 boolean delStart, boolean delEnd ) {
586 int start = target.indexOf( startDel );
587
588 if ( start == -1 ) {
589 return null;
590 }
591
592 String s = target.substring( start, target.indexOf( endDel, start + 1 ) + 1 );
593
594 s = s.trim();
595
596 if ( delStart ) {
597 while ( s.startsWith( startDel ) ) {
598 s = s.substring( startDel.length(), s.length() ).trim();
599 }
600 }
601
602 if ( delEnd ) {
603 while ( s.endsWith( endDel ) ) {
604 s = s.substring( 0, s.length() - endDel.length() ).trim();
605 }
606 }
607
608 return s;
609 }
610
611 /**
612 * Initialize the substitution map with all normalization rules for a given locale and
613 * add this map to the static localeMap.
614 *
615 * @param locale
616 * @throws IOException
617 * @throws SAXException
618 * @throws XMLParsingException
619 */
620 private static void initMap( String locale )
621 throws IOException, SAXException, XMLParsingException {
622
623 // read normalization file
624 StringBuffer sb = new StringBuffer( 1000 );
625 InputStream is = StringTools.class.getResourceAsStream( "/normalization.xml" );
626 if ( is == null ){
627 is = StringTools.class.getResourceAsStream( "normalization.xml" );
628 }
629 BufferedReader br = new BufferedReader( new InputStreamReader( is ) );
630 String s = null;
631 while ( ( s = br.readLine() ) != null ) {
632 sb.append( s );
633 }
634 br.close();
635
636 // transform into xml fragment
637 XMLFragment xml = new XMLFragment();
638 xml.load( new StringReader( sb.toString() ),
639 StringTools.class.getResource( "normalization.xml" ).toString() ); //FIXME
640
641 // create map
642 Map<String, String> substitutionMap = new HashMap<String, String>( 20 );
643
644 // extract case attrib ( "toLower" or "toUpper" or missing ) for passed locale
645 String xpath = "Locale[@name = '" + Locale.GERMANY.getLanguage() + "']/@case";
646 String letterCase = XMLTools.getNodeAsString( xml.getRootElement(), xpath,
647 CommonNamespaces.getNamespaceContext(), null );
648 if ( letterCase != null ) {
649 substitutionMap.put( "case", letterCase );
650 }
651
652 // extract removeDoubles attrib ( "true" or "false" ) for passed locale
653 xpath = "Locale[@name = '" + Locale.GERMANY.getLanguage() + "']/@removeDoubles";
654 String removeDoubles = XMLTools.getNodeAsString( xml.getRootElement(), xpath,
655 CommonNamespaces.getNamespaceContext(),
656 null );
657 if ( removeDoubles != null && removeDoubles.length() > 0 ) {
658 substitutionMap.put( "removeDoubles", removeDoubles );
659 }
660
661 // extract rules section for passed locale
662 xpath = "Locale[@name = '" + locale + "']/Rule";
663 List list = XMLTools.getNodes( xml.getRootElement(), xpath,
664 CommonNamespaces.getNamespaceContext() );
665 if ( list != null ) {
666 for ( int i = 0; i < list.size(); i++ ) {
667 String src = XMLTools.getRequiredNodeAsString( (Node) list.get( i ),
668 "Source",
669 CommonNamespaces.getNamespaceContext() );
670 String target = XMLTools.getRequiredNodeAsString( (Node) list.get( i ),
671 "Target",
672 CommonNamespaces.getNamespaceContext() );
673 substitutionMap.put( src, target );
674 }
675 }
676
677 // init localeMap if needed
678 if ( localeMap == null ) {
679 localeMap = new HashMap<String, Map<String, String>>( 20 );
680 }
681
682 localeMap.put( locale, substitutionMap );
683 }
684
685 /**
686 * The passed string gets normalized along the rules for the given locale as they are set in
687 * the file "./normalization.xml".
688 * If such rules are specified, the following order is obeyed:
689 *
690 * <ol>
691 * <li>if the attribute "case" is set with "toLower" or "toUpper", the letters are switched
692 * to lower case or to upper case respectively.</li>
693 * <li>all rules given in the "Rule" elements are performed.</li>
694 * <li>if the attribute "removeDoubles" is set and not empty, all multi occurences of the
695 * letters given in this attribute are reduced to a single occurence.</li>
696 * </ol>
697 *
698 * @param source the String to normalize
699 * @param locale the locale language defining the rules to choose, e.g. "de"
700 * @return the normalized String
701 * @throws IOException
702 * @throws SAXException
703 * @throws XMLParsingException
704 */
705 public static String normalizeString( String source, String locale )
706 throws IOException, SAXException, XMLParsingException {
707
708 if ( localeMap == null ) {
709 localeMap = new HashMap<String, Map<String, String>>( 20 );
710 }
711 Map<String, String> substitutionMap = localeMap.get( locale );
712
713 if ( substitutionMap == null ) {
714 initMap( locale );
715 }
716 substitutionMap = localeMap.get( locale );
717
718 String output = source;
719 Set<String> keys = substitutionMap.keySet();
720
721 boolean toUpper = false;
722 boolean toLower = false;
723 boolean removeDoubles = false;
724
725 for ( String key : keys ) {
726 if ( "case".equals( key ) ) {
727 toUpper = "toUpper".equals( substitutionMap.get( key ) );
728 toLower = "toLower".equals( substitutionMap.get( key ) );
729 }
730 if ( "removeDoubles".equals( key ) && substitutionMap.get( key ).length() > 0 ) {
731 removeDoubles = true;
732 }
733 }
734
735 // first: change letters to upper / lower case
736 if ( toUpper ) {
737 output = output.toUpperCase();
738 } else if ( toLower ) {
739 output = output.toLowerCase();
740 }
741
742 // second: change string according to specified rules
743 for ( String key : keys ) {
744 if ( !"case".equals( key ) && !"removeDoubles".equals( key ) ) {
745 output = output.replaceAll( key, substitutionMap.get( key ) );
746 }
747 }
748
749 // third: remove doubles
750 if ( removeDoubles ) {
751 String doubles = substitutionMap.get( "removeDoubles" );
752 for ( int i = 0; i < doubles.length(); i++ ) {
753 String remove = "" + doubles.charAt( i ) + "+";
754 String replaceWith = "" + doubles.charAt( i );
755 output = output.replaceAll( remove, replaceWith );
756 }
757 }
758 return output;
759 }
760 }