001 //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/tags/2.1/src/org/deegree/framework/util/StringTools.java $
002 /*---------------- FILE HEADER ------------------------------------------
003
004 This file is part of deegree.
005 Copyright (C) 2001-2007 by:
006 EXSE, Department of Geography, University of Bonn
007 http://www.giub.uni-bonn.de/deegree/
008 lat/lon GmbH
009 http://www.lat-lon.de
010
011 This library is free software; you can redistribute it and/or
012 modify it under the terms of the GNU Lesser General Public
013 License as published by the Free Software Foundation; either
014 version 2.1 of the License, or (at your option) any later version.
015
016 This library is distributed in the hope that it will be useful,
017 but WITHOUT ANY WARRANTY; without even the implied warranty of
018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 Lesser General Public License for more details.
020
021 You should have received a copy of the GNU Lesser General Public
022 License along with this library; if not, write to the Free Software
023 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024
025 Contact:
026
027 Andreas Poth
028 lat/lon GmbH
029 Aennchenstr. 19
030 53115 Bonn
031 Germany
032 E-Mail: poth@lat-lon.de
033
034 Prof. Dr. Klaus Greve
035 Department of Geography
036 University of Bonn
037 Meckenheimer Allee 166
038 53115 Bonn
039 Germany
040 E-Mail: greve@giub.uni-bonn.de
041
042
043 ---------------------------------------------------------------------------*/
044 package org.deegree.framework.util;
045
046 import java.io.BufferedReader;
047 import java.io.IOException;
048 import java.io.InputStream;
049 import java.io.InputStreamReader;
050 import java.io.StringReader;
051 import java.util.ArrayList;
052 import java.util.HashMap;
053 import java.util.List;
054 import java.util.Locale;
055 import java.util.Map;
056 import java.util.Set;
057 import java.util.StringTokenizer;
058
059 import org.deegree.framework.xml.XMLFragment;
060 import org.deegree.framework.xml.XMLParsingException;
061 import org.deegree.framework.xml.XMLTools;
062 import org.deegree.ogcbase.CommonNamespaces;
063 import org.w3c.dom.Node;
064 import org.xml.sax.SAXException;
065
066 /**
067 * this is a collection of some methods that extends the functionallity of the sun-java string
068 * class.
069 *
070 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
071 * @author last edited by: $Author: apoth $
072 *
073 * @version $Revision: 7730 $, $Date: 2007-07-05 21:47:32 +0200 (Do, 05 Jul 2007) $
074 */
075 public class StringTools {
076
077 /**
078 * This map is used for methods normalizeString() and initMap().
079 *
080 * key = locale language, e.g. "de"
081 * value = map of substitution rules for this locale
082 */
083 private static Map<String, Map<String, String>> localeMap;
084
085 /**
086 * concatenates an array of strings using a
087 *
088 * @see StringBuffer
089 *
090 * @param size
091 * estimated size of the target string
092 * @param objects
093 * toString() will be called for each object to append it to the result string
094 * @return
095 */
096 public static String concat( int size, Object... objects ) {
097 StringBuilder sbb = new StringBuilder( size );
098 for ( int i = 0; i < objects.length; i++ ) {
099 sbb.append( objects[i] );
100 }
101 return sbb.toString();
102 }
103
104 /**
105 * replaces occurences of a string fragment within a string by a new string.
106 *
107 * @param target
108 * is the original string
109 * @param from
110 * is the string to be replaced
111 * @param to
112 * is the string which will used to replace
113 * @param all
114 * if it's true all occurences of the string to be replaced will be replaced. else
115 * only the first occurence will be replaced.
116 */
117 public static String replace( String target, String from, String to, boolean all ) {
118
119 StringBuffer buffer = new StringBuffer( target.length() );
120 int copyFrom = 0;
121 char[] targetChars = null;
122 int lf = from.length();
123 int start = -1;
124 do {
125 start = target.indexOf( from );
126 copyFrom = 0;
127 if ( start == -1 ) {
128 return target;
129 }
130
131 targetChars = target.toCharArray();
132 while ( start != -1 ) {
133 buffer.append( targetChars, copyFrom, start - copyFrom );
134 buffer.append( to );
135 copyFrom = start + lf;
136 start = target.indexOf( from, copyFrom );
137 if ( !all ) {
138 start = -1;
139 }
140 }
141 buffer.append( targetChars, copyFrom, targetChars.length - copyFrom );
142 target = buffer.toString();
143 buffer.delete( 0, buffer.length() );
144 } while ( target.indexOf( from ) > -1 && to.indexOf( from ) < 0 );
145
146 return target;
147 }
148
149 /**
150 * parse a string and return its tokens as array
151 *
152 * @param s
153 * string to parse
154 * @param delimiter
155 * delimiter that marks the end of a token
156 * @param deleteDoubles
157 * if it's true all string that are already within the resulting array will be
158 * deleted, so that there will only be one copy of them.
159 */
160 public static String[] toArray( String s, String delimiter, boolean deleteDoubles ) {
161 if ( s == null || s.equals( "" ) ) {
162 return new String[0];
163 }
164
165 StringTokenizer st = new StringTokenizer( s, delimiter );
166 ArrayList<String> vec = new ArrayList<String>( st.countTokens() );
167
168 if ( st.countTokens() > 0 ) {
169 for ( int i = 0; st.hasMoreTokens(); i++ ) {
170 String t = st.nextToken();
171 if ( ( t != null ) && ( t.length() > 0 ) ) {
172 vec.add( t.trim() );
173 }
174 }
175 } else {
176 vec.add( s );
177 }
178
179 String[] kw = vec.toArray( new String[vec.size()] );
180 if ( deleteDoubles ) {
181 kw = deleteDoubles( kw );
182 }
183
184 return kw;
185 }
186
187 /**
188 * parse a string and return its tokens as typed List
189 *
190 * @param s
191 * string to parse
192 * @param delimiter
193 * delimiter that marks the end of a token
194 * @param deleteDoubles
195 * if it's true all string that are already within the resulting array will be
196 * deleted, so that there will only be one copy of them.
197 * @return
198 */
199 public static List<String> toList( String s, String delimiter, boolean deleteDoubles ) {
200 if ( s == null || s.equals( "" ) ) {
201 return new ArrayList<String>();
202 }
203
204 StringTokenizer st = new StringTokenizer( s, delimiter );
205 ArrayList<String> vec = new ArrayList<String>( st.countTokens() );
206 for ( int i = 0; st.hasMoreTokens(); i++ ) {
207 String t = st.nextToken();
208 if ( ( t != null ) && ( t.length() > 0 ) ) {
209 if ( deleteDoubles ) {
210 if ( !vec.contains( t.trim() ) ) {
211 vec.add( t.trim() );
212 }
213 } else {
214 vec.add( t.trim() );
215 }
216 }
217 }
218
219 return vec;
220 }
221
222 /**
223 * transforms a string array to one string. the array fields are seperated by the submitted
224 * delimiter:
225 *
226 * @param s
227 * stringarray to transform
228 * @param delimiter
229 */
230 public static String arrayToString( String[] s, char delimiter ) {
231 StringBuffer res = new StringBuffer( s.length * 20 );
232
233 for ( int i = 0; i < s.length; i++ ) {
234 res.append( s[i] );
235
236 if ( i < ( s.length - 1 ) ) {
237 res.append( delimiter );
238 }
239 }
240
241 return res.toString();
242 }
243
244 /**
245 * transforms a list to one string. the array fields are seperated by the submitted delimiter:
246 *
247 * @param s
248 * stringarray to transform
249 * @param delimiter
250 */
251 public static String listToString( List s, char delimiter ) {
252 StringBuffer res = new StringBuffer( s.size() * 20 );
253
254 for ( int i = 0; i < s.size(); i++ ) {
255 res.append( s.get( i ) );
256
257 if ( i < ( s.size() - 1 ) ) {
258 res.append( delimiter );
259 }
260 }
261
262 return res.toString();
263 }
264
265 /**
266 * transforms a double array to one string. the array fields are seperated by the submitted
267 * delimiter:
268 *
269 * @param s
270 * stringarray to transform
271 * @param delimiter
272 */
273 public static String arrayToString( double[] s, char delimiter ) {
274 StringBuffer res = new StringBuffer( s.length * 20 );
275
276 for ( int i = 0; i < s.length; i++ ) {
277 res.append( Double.toString( s[i] ) );
278
279 if ( i < ( s.length - 1 ) ) {
280 res.append( delimiter );
281 }
282 }
283
284 return res.toString();
285 }
286
287 /**
288 * transforms a int array to one string. the array fields are seperated by the submitted
289 * delimiter:
290 *
291 * @param s
292 * stringarray to transform
293 * @param delimiter
294 */
295 public static String arrayToString( int[] s, char delimiter ) {
296 StringBuffer res = new StringBuffer( s.length * 20 );
297
298 for ( int i = 0; i < s.length; i++ ) {
299 res.append( Integer.toString( s[i] ) );
300
301 if ( i < ( s.length - 1 ) ) {
302 res.append( delimiter );
303 }
304 }
305
306 return res.toString();
307 }
308
309 /**
310 * clears the begin and end of a string from the strings sumitted
311 *
312 * @param s
313 * string to validate
314 * @param mark
315 * string to remove from begin and end of <code>s</code>
316 */
317 public static String validateString( String s, String mark ) {
318 if ( s == null ) {
319 return null;
320 }
321
322 if ( s.length() == 0 ) {
323 return s;
324 }
325
326 s = s.trim();
327
328 while ( s.startsWith( mark ) ) {
329 s = s.substring( mark.length(), s.length() ).trim();
330 }
331
332 while ( s.endsWith( mark ) ) {
333 s = s.substring( 0, s.length() - mark.length() ).trim();
334 }
335
336 return s;
337 }
338
339 /**
340 * deletes all double entries from the submitted array
341 */
342 public static String[] deleteDoubles( String[] s ) {
343 ArrayList<String> vec = new ArrayList<String>( s.length );
344
345 for ( int i = 0; i < s.length; i++ ) {
346 if ( !vec.contains( s[i] ) ) {
347 vec.add( s[i] );
348 }
349 }
350
351 return vec.toArray( new String[vec.size()] );
352 }
353
354 /**
355 * removes all fields from the array that equals <code>s</code>
356 *
357 * @param target
358 * array where to remove the submitted string
359 * @param s
360 * string to remove
361 */
362 public static String[] removeFromArray( String[] target, String s ) {
363 ArrayList<String> vec = new ArrayList<String>( target.length );
364
365 for ( int i = 0; i < target.length; i++ ) {
366 if ( !target[i].equals( s ) ) {
367 vec.add( target[i] );
368 }
369 }
370
371 return vec.toArray( new String[vec.size()] );
372 }
373
374 /**
375 * checks if the submitted array contains the string <code>value</code>
376 *
377 * @param target
378 * array to check if it contains <code>value</code>
379 * @param value
380 * string to check if it within the array
381 */
382 public static boolean contains( String[] target, String value ) {
383 if ( target == null || value == null ) {
384 return false;
385 }
386
387 if ( value.endsWith( "," ) ) {
388 value = value.substring( 0, value.length() - 1 );
389 }
390
391 for ( int i = 0; i < target.length; i++ ) {
392 if ( value.equalsIgnoreCase( target[i] ) ) {
393 return true;
394 }
395 }
396
397 return false;
398 }
399
400 /**
401 * convert the array of string like [(x1,y1),(x2,y2)...] into an array of double
402 * [x1,y1,x2,y2...]
403 *
404 * @param s
405 * @param delimiter
406 *
407 * @return
408 */
409 public static double[] toArrayDouble( String s, String delimiter ) {
410 if ( s == null ) {
411 return null;
412 }
413
414 if ( s.equals( "" ) ) {
415 return null;
416 }
417
418 StringTokenizer st = new StringTokenizer( s, delimiter );
419
420 ArrayList<String> vec = new ArrayList<String>( st.countTokens() );
421
422 for ( int i = 0; st.hasMoreTokens(); i++ ) {
423 String t = st.nextToken().replace( ' ', '+' );
424
425 if ( ( t != null ) && ( t.length() > 0 ) ) {
426 vec.add( t.trim().replace( ',', '.' ) );
427 }
428 }
429
430 double[] array = new double[vec.size()];
431
432 for ( int i = 0; i < vec.size(); i++ ) {
433 array[i] = Double.parseDouble( vec.get( i ) );
434 }
435
436 return array;
437 }
438
439 /**
440 * convert the array of string like [(x1,y1),(x2,y2)...] into an array of
441 * float values [x1,y1,x2,y2...]
442 *
443 * @param s
444 * @param delimiter
445 *
446 * @return
447 */
448 public static float[] toArrayFloat( String s, String delimiter ) {
449 if ( s == null ) {
450 return null;
451 }
452
453 if ( s.equals( "" ) ) {
454 return null;
455 }
456
457 StringTokenizer st = new StringTokenizer( s, delimiter );
458
459 ArrayList<String> vec = new ArrayList<String>( st.countTokens() );
460 for ( int i = 0; st.hasMoreTokens(); i++ ) {
461 String t = st.nextToken().replace( ' ', '+' );
462 if ( ( t != null ) && ( t.length() > 0 ) ) {
463 vec.add( t.trim().replace( ',', '.' ) );
464 }
465 }
466
467 float[] array = new float[vec.size()];
468
469 for ( int i = 0; i < vec.size(); i++ ) {
470 array[i] = Float.parseFloat( vec.get( i ) );
471 }
472
473 return array;
474 }
475
476 /**
477 * transforms an array of StackTraceElements into a String
478 */
479 public static String stackTraceToString( StackTraceElement[] se ) {
480
481 StringBuffer sb = new StringBuffer();
482 for ( int i = 0; i < se.length; i++ ) {
483 sb.append( se[i].getClassName() + " " );
484 sb.append( se[i].getFileName() + " " );
485 sb.append( se[i].getMethodName() + "(" );
486 sb.append( se[i].getLineNumber() + ")\n" );
487 }
488 return sb.toString();
489 }
490
491 /**
492 * gets the stacktrace array from the passed Excption and transforms it into a String
493 */
494 public static String stackTraceToString( Throwable e ) {
495
496 StackTraceElement[] se = e.getStackTrace();
497 StringBuffer sb = new StringBuffer();
498 sb.append( e.getMessage() ).append( "\n" );
499 sb.append( e.getClass().getName() ).append( "\n" );
500 for ( int i = 0; i < se.length; i++ ) {
501 sb.append( se[i].getClassName() + " " );
502 sb.append( se[i].getFileName() + " " );
503 sb.append( se[i].getMethodName() + "(" );
504 sb.append( se[i].getLineNumber() + ")\n" );
505 if ( i > 4 )
506 break;
507 }
508 return sb.toString();
509 }
510
511 /**
512 * countString count the occurrences of token into target
513 *
514 * @param target
515 * @param token
516 *
517 * @return
518 */
519 public static int countString( String target, String token ) {
520 int start = target.indexOf( token );
521 int count = 0;
522
523 while ( start != -1 ) {
524 count++;
525 start = target.indexOf( token, start + 1 );
526 }
527
528 return count;
529 }
530
531 /**
532 * Extract all the strings that begin with "start" and end with "end" and store it into an array
533 * of String
534 *
535 * @param target
536 * @param startString
537 * @param endString
538 *
539 * @return <code>null</code> if no strings were found!!
540 */
541 public static String[] extractStrings( String target, String startString, String endString ) {
542 int start = target.indexOf( startString );
543
544 if ( start == -1 ) {
545 return null;
546 }
547
548 int count = countString( target, startString );
549 String[] subString = null;
550 if ( startString.equals( endString ) ) {
551 count = count / 2;
552 subString = new String[count];
553 for ( int i = 0; i < count; i++ ) {
554 int tmp = target.indexOf( endString, start + 1 );
555 subString[i] = target.substring( start, tmp + 1 );
556 start = target.indexOf( startString, tmp + 1 );
557 }
558 } else {
559 subString = new String[count];
560 for ( int i = 0; i < count; i++ ) {
561 subString[i] = target.substring( start, target.indexOf( endString, start + 1 ) + 1 );
562 subString[i] = extractString( subString[i], startString, endString, true, true );
563 start = target.indexOf( startString, start + 1 );
564 }
565 }
566
567 return subString;
568 }
569
570 /**
571 * extract a string contained between startDel and endDel, you can remove the delimiters if set
572 * true the parameters delStart and delEnd
573 *
574 * @param target
575 * @param startDel
576 * @param endDel
577 * @param delStart
578 * @param delEnd
579 *
580 * @return
581 */
582 public static String extractString( String target, String startDel, String endDel,
583 boolean delStart, boolean delEnd ) {
584 int start = target.indexOf( startDel );
585
586 if ( start == -1 ) {
587 return null;
588 }
589
590 String s = target.substring( start, target.indexOf( endDel, start + 1 ) + 1 );
591
592 s = s.trim();
593
594 if ( delStart ) {
595 while ( s.startsWith( startDel ) ) {
596 s = s.substring( startDel.length(), s.length() ).trim();
597 }
598 }
599
600 if ( delEnd ) {
601 while ( s.endsWith( endDel ) ) {
602 s = s.substring( 0, s.length() - endDel.length() ).trim();
603 }
604 }
605
606 return s;
607 }
608
609 /**
610 * Initialize the substitution map with all normalization rules for a given locale and
611 * add this map to the static localeMap.
612 *
613 * @param locale
614 * @throws IOException
615 * @throws SAXException
616 * @throws XMLParsingException
617 */
618 private static void initMap( String locale )
619 throws IOException, SAXException, XMLParsingException {
620
621 // read normalization file
622 StringBuffer sb = new StringBuffer( 1000 );
623 InputStream is = StringTools.class.getResourceAsStream( "/normalization.xml" );
624 if ( is == null ){
625 is = StringTools.class.getResourceAsStream( "normalization.xml" );
626 }
627 BufferedReader br = new BufferedReader( new InputStreamReader( is ) );
628 String s = null;
629 while ( ( s = br.readLine() ) != null ) {
630 sb.append( s );
631 }
632 br.close();
633
634 // transform into xml fragment
635 XMLFragment xml = new XMLFragment();
636 xml.load( new StringReader( sb.toString() ),
637 StringTools.class.getResource( "normalization.xml" ).toString() ); //FIXME
638
639 // create map
640 Map<String, String> substitutionMap = new HashMap<String, String>( 20 );
641
642 // extract case attrib ( "toLower" or "toUpper" or missing ) for passed locale
643 String xpath = "Locale[@name = '" + Locale.GERMANY.getLanguage() + "']/@case";
644 String letterCase = XMLTools.getNodeAsString( xml.getRootElement(), xpath,
645 CommonNamespaces.getNamespaceContext(), null );
646 if ( letterCase != null ) {
647 substitutionMap.put( "case", letterCase );
648 }
649
650 // extract removeDoubles attrib ( "true" or "false" ) for passed locale
651 xpath = "Locale[@name = '" + Locale.GERMANY.getLanguage() + "']/@removeDoubles";
652 String removeDoubles = XMLTools.getNodeAsString( xml.getRootElement(), xpath,
653 CommonNamespaces.getNamespaceContext(),
654 null );
655 if ( removeDoubles != null && removeDoubles.length() > 0 ) {
656 substitutionMap.put( "removeDoubles", removeDoubles );
657 }
658
659 // extract rules section for passed locale
660 xpath = "Locale[@name = '" + locale + "']/Rule";
661 List list = XMLTools.getNodes( xml.getRootElement(), xpath,
662 CommonNamespaces.getNamespaceContext() );
663 if ( list != null ) {
664 for ( int i = 0; i < list.size(); i++ ) {
665 String src = XMLTools.getRequiredNodeAsString( (Node) list.get( i ),
666 "Source",
667 CommonNamespaces.getNamespaceContext() );
668 String target = XMLTools.getRequiredNodeAsString( (Node) list.get( i ),
669 "Target",
670 CommonNamespaces.getNamespaceContext() );
671 substitutionMap.put( src, target );
672 }
673 }
674
675 // init localeMap if needed
676 if ( localeMap == null ) {
677 localeMap = new HashMap<String, Map<String, String>>( 20 );
678 }
679
680 localeMap.put( locale, substitutionMap );
681 }
682
683 /**
684 * The passed string gets normalized along the rules for the given locale as they are set in
685 * the file "./normalization.xml".
686 * If such rules are specified, the following order is obeyed:
687 *
688 * <ol>
689 * <li>if the attribute "case" is set with "toLower" or "toUpper", the letters are switched
690 * to lower case or to upper case respectively.</li>
691 * <li>all rules given in the "Rule" elements are performed.</li>
692 * <li>if the attribute "removeDoubles" is set and not empty, all multi occurences of the
693 * letters given in this attribute are reduced to a single occurence.</li>
694 * </ol>
695 *
696 * @param source the String to normalize
697 * @param locale the locale language defining the rules to choose, e.g. "de"
698 * @return the normalized String
699 * @throws IOException
700 * @throws SAXException
701 * @throws XMLParsingException
702 */
703 public static String normalizeString( String source, String locale )
704 throws IOException, SAXException, XMLParsingException {
705
706 if ( localeMap == null ) {
707 localeMap = new HashMap<String, Map<String, String>>( 20 );
708 }
709 Map<String, String> substitutionMap = localeMap.get( locale );
710
711 if ( substitutionMap == null ) {
712 initMap( locale );
713 }
714 substitutionMap = localeMap.get( locale );
715
716 String output = source;
717 Set<String> keys = substitutionMap.keySet();
718
719 boolean toUpper = false;
720 boolean toLower = false;
721 boolean removeDoubles = false;
722
723 for ( String key : keys ) {
724 if ( "case".equals( key ) ) {
725 toUpper = "toUpper".equals( substitutionMap.get( key ) );
726 toLower = "toLower".equals( substitutionMap.get( key ) );
727 }
728 if ( "removeDoubles".equals( key ) && substitutionMap.get( key ).length() > 0 ) {
729 removeDoubles = true;
730 }
731 }
732
733 // first: change letters to upper / lower case
734 if ( toUpper ) {
735 output = output.toUpperCase();
736 } else if ( toLower ) {
737 output = output.toLowerCase();
738 }
739
740 // second: change string according to specified rules
741 for ( String key : keys ) {
742 if ( !"case".equals( key ) && !"removeDoubles".equals( key ) ) {
743 output = output.replaceAll( key, substitutionMap.get( key ) );
744 }
745 }
746
747 // third: remove doubles
748 if ( removeDoubles ) {
749 String doubles = substitutionMap.get( "removeDoubles" );
750 for ( int i = 0; i < doubles.length(); i++ ) {
751 String remove = "" + doubles.charAt( i ) + "+";
752 String replaceWith = "" + doubles.charAt( i );
753 output = output.replaceAll( remove, replaceWith );
754 }
755 }
756 return output;
757 }
758 }