001 //$HeadURL: svn+ssh://jwilden@svn.wald.intevation.org/deegree/base/branches/2.5_testing/src/org/deegree/io/csv/CSVReader.java $ 002 /*---------------------------------------------------------------------------- 003 This file is part of deegree, http://deegree.org/ 004 Copyright (C) 2001-2009 by: 005 Department of Geography, University of Bonn 006 and 007 lat/lon GmbH 008 009 This library is free software; you can redistribute it and/or modify it under 010 the terms of the GNU Lesser General Public License as published by the Free 011 Software Foundation; either version 2.1 of the License, or (at your option) 012 any later version. 013 This library is distributed in the hope that it will be useful, but WITHOUT 014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 015 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 016 details. 017 You should have received a copy of the GNU Lesser General Public License 018 along with this library; if not, write to the Free Software Foundation, Inc., 019 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 020 021 Contact information: 022 023 lat/lon GmbH 024 Aennchenstr. 19, 53177 Bonn 025 Germany 026 http://lat-lon.de/ 027 028 Department of Geography, University of Bonn 029 Prof. Dr. Klaus Greve 030 Postfach 1147, 53001 Bonn 031 Germany 032 http://www.geographie.uni-bonn.de/deegree/ 033 034 e-mail: info@deegree.org 035 ----------------------------------------------------------------------------*/ 036 037 package org.deegree.io.csv; 038 039 import static java.io.StreamTokenizer.TT_EOF; 040 import static java.lang.Double.parseDouble; 041 import static java.util.Collections.unmodifiableList; 042 import static org.deegree.datatypes.Types.VARCHAR; 043 import static org.deegree.framework.log.LoggerFactory.getLogger; 044 import static org.deegree.io.mapinfoapi.MapInfoReader.whitespaceChars; 045 import static org.deegree.io.mapinfoapi.MapInfoReader.wordChars; 046 import static org.deegree.model.feature.FeatureFactory.createFeature; 047 import static org.deegree.model.feature.FeatureFactory.createFeatureCollection; 048 import static org.deegree.model.feature.FeatureFactory.createFeatureProperty; 049 import static org.deegree.model.feature.FeatureFactory.createFeatureType; 050 import static org.deegree.model.feature.FeatureFactory.createGeometryPropertyType; 051 import static org.deegree.model.feature.FeatureFactory.createSimplePropertyType; 052 import static org.deegree.model.spatialschema.GeometryFactory.createPoint; 053 import static org.deegree.model.spatialschema.WKTAdapter.wrap; 054 055 import java.io.BufferedReader; 056 import java.io.File; 057 import java.io.FileReader; 058 import java.io.IOException; 059 import java.io.StreamTokenizer; 060 import java.io.StringReader; 061 import java.net.URI; 062 import java.net.URISyntaxException; 063 import java.util.ArrayList; 064 import java.util.LinkedList; 065 import java.util.List; 066 067 import org.deegree.datatypes.QualifiedName; 068 import org.deegree.framework.log.ILogger; 069 import org.deegree.model.feature.FeatureCollection; 070 import org.deegree.model.feature.FeatureProperty; 071 import org.deegree.model.feature.schema.FeatureType; 072 import org.deegree.model.feature.schema.PropertyType; 073 import org.deegree.model.spatialschema.Geometry; 074 import org.deegree.model.spatialschema.GeometryException; 075 076 /** 077 * <code>CSVReader</code> 078 * 079 * @author <a href="mailto:schmitz@lat-lon.de">Andreas Schmitz</a> 080 * @author last edited by: $Author: mschneider $ 081 * 082 * @version $Revision: 18195 $, $Date: 2009-06-18 17:55:39 +0200 (Do, 18 Jun 2009) $ 083 */ 084 public class CSVReader { 085 086 private static final ILogger LOG = getLogger( CSVReader.class ); 087 088 private File fileName; 089 090 private int xcol = 0, ycol = 1, wkt = -1; 091 092 private static URI APPNS; 093 094 private List<String[]> header; 095 096 private boolean ignoreFirstLine, parseGeometryProperty = true; 097 098 static { 099 try { 100 APPNS = new URI( "http://www.deegree.org/app" ); 101 } catch ( URISyntaxException e ) { 102 // yes, cannot happen 103 } 104 } 105 106 /** 107 * @param name 108 * @param ignoreFirstLine 109 * @throws IOException 110 */ 111 public CSVReader( String name, boolean ignoreFirstLine ) throws IOException { 112 this.ignoreFirstLine = ignoreFirstLine; 113 114 fileName = new File( name ).getAbsoluteFile(); 115 116 header = new ArrayList<String[]>( 3 ); 117 118 BufferedReader in = new BufferedReader( new FileReader( name ) ); 119 String str = in.readLine(); 120 char separat = determineSeparator( str ); 121 do { 122 List<String> lst = parseLine( str, separat ); 123 header.add( lst.toArray( new String[lst.size()] ) ); 124 } while ( ( ( str = in.readLine() ) != null ) && header.size() < 3 ); 125 in.close(); 126 } 127 128 /** 129 * @return max. the first three lines of the file (if there are three) 130 */ 131 public List<String[]> getHeader() { 132 return unmodifiableList( header ); 133 } 134 135 /** 136 * By default, a geometry property will be parsed. Set this to false to get "simple property only" features. 137 * 138 * @param parseGeometryProperty 139 */ 140 public void setParseGeometryProperty( boolean parseGeometryProperty ) { 141 this.parseGeometryProperty = parseGeometryProperty; 142 } 143 144 private static char determineSeparator( String s ) { 145 // determine most likely separator 146 int ccount = countChars( s, ',' ); 147 int scount = countChars( s, ';' ); 148 int tcount = countChars( s, '\t' ); 149 if ( ccount >= scount && ccount >= tcount ) { 150 return ','; 151 } 152 if ( tcount >= ccount && tcount >= scount ) { 153 return '\t'; 154 } 155 if ( scount >= ccount && scount >= tcount ) { 156 return ';'; 157 } 158 return ','; 159 } 160 161 private static List<String> parseLine( String line, char separator ) 162 throws IOException { 163 String seps = ",;\t"; 164 for ( int i = 0; i < seps.length(); ++i ) { 165 if ( line.startsWith( "" + seps.charAt( i ) ) ) { 166 line = "\"\"" + line; 167 } 168 String dseps = "" + seps.charAt( i ) + seps.charAt( i ); 169 while ( line.indexOf( dseps ) != -1 ) { 170 line = line.replace( dseps, seps.charAt( i ) + "\"\"" + seps.charAt( i ) ); 171 } 172 } 173 StreamTokenizer tok = getCSVFromStringTokenizer( line, separator ); 174 175 LinkedList<String> list = new LinkedList<String>(); 176 177 tok.nextToken(); 178 if ( tok.ttype == TT_EOF ) { 179 return list; 180 } 181 while ( tok.ttype != TT_EOF ) { 182 list.add( tok.sval ); 183 tok.nextToken(); 184 } 185 186 return list; 187 } 188 189 /** 190 * Also sets wkt to -1. 191 * 192 * @param x 193 * @param y 194 */ 195 public void setPointColumns( int x, int y ) { 196 xcol = x; 197 ycol = y; 198 wkt = -1; 199 } 200 201 /** 202 * @param wkt 203 * if -1, x/y will be used instead 204 */ 205 public void setWKTColumn( int wkt ) { 206 this.wkt = wkt; 207 } 208 209 /** 210 * @param input 211 * @param separator 212 * @return a tokenizer with a stringreader as data input 213 */ 214 public static StreamTokenizer getCSVFromStringTokenizer( String input, char separator ) { 215 StreamTokenizer tok = new StreamTokenizer( new StringReader( input ) ); 216 217 tok.resetSyntax(); 218 tok.eolIsSignificant( true ); 219 tok.lowerCaseMode( true ); 220 tok.slashSlashComments( false ); 221 tok.slashStarComments( false ); 222 tok.wordChars( 'a', 'z' ); 223 tok.wordChars( 'A', 'Z' ); 224 tok.wordChars( '\u00a0', '\u00ff' ); 225 tok.wordChars( '0', '9' ); 226 wordChars( tok, ',', '\t', ';' ); 227 wordChars( tok, '.', '-', '_', ' ', '+', '/', '\\', '(', ')', '^' ); 228 tok.quoteChar( '"' ); 229 whitespaceChars( tok, '\n', '\r', '\f' ); 230 231 // reset separator 232 whitespaceChars( tok, separator ); 233 234 return tok; 235 } 236 237 private static int countChars( String s, char c ) { 238 int count = 0; 239 for ( int i = 0; i < s.length(); ++i ) { 240 if ( s.charAt( i ) == c ) { 241 ++count; 242 } 243 } 244 return count; 245 } 246 247 /** 248 * @return a new feature collection 249 * @throws IOException 250 */ 251 public FeatureCollection parseFeatureCollection() 252 throws IOException { 253 FeatureCollection fc = createFeatureCollection( "uniquemy_", 512 ); 254 QualifiedName geomName = new QualifiedName( "app:geometry", APPNS ); 255 QualifiedName featureName = new QualifiedName( "app:feature", APPNS ); 256 257 int counter = 0; 258 259 BufferedReader in = new BufferedReader( new FileReader( fileName ) ); 260 String str = in.readLine(); 261 List<String> colNames = null; 262 263 char separator = determineSeparator( str ); 264 if ( ignoreFirstLine ) { 265 colNames = parseLine( str, separator ); 266 str = in.readLine(); 267 } 268 outer: do { 269 LOG.logDebug( "Trying to parse line ", str ); 270 List<String> vals = parseLine( str, separator ); 271 272 double x = 0, y = 0; 273 Geometry wktGeom = null; 274 LinkedList<FeatureProperty> fps = new LinkedList<FeatureProperty>(); 275 LinkedList<PropertyType> fpt = new LinkedList<PropertyType>(); 276 277 for ( int i = 0; i < vals.size(); ++i ) { 278 279 if ( parseGeometryProperty && wkt == -1 && i == xcol ) { 280 try { 281 x = parseDouble( vals.get( i ) ); 282 } catch ( NumberFormatException nfe ) { 283 // puh, CSV is an easy format? I think not... 284 try { 285 x = parseDouble( vals.get( i ).replace( ",", "." ) ); 286 } catch ( NumberFormatException nfe2 ) { 287 LOG.logWarning( "Skipping line " + str ); 288 continue outer; 289 } 290 } 291 continue; 292 } 293 if ( parseGeometryProperty && wkt == -1 && i == ycol ) { 294 if ( vals.get( i ).equals( "" ) ) { 295 y = 0; // this seems to be a sensible (Java-like) default 296 } else { 297 try { 298 y = parseDouble( vals.get( i ) ); 299 } catch ( NumberFormatException nfe ) { 300 // puh, CSV is an easy format? I think not... 301 try { 302 y = parseDouble( vals.get( i ).replace( ",", "." ) ); 303 } catch ( NumberFormatException nfe2 ) { 304 LOG.logWarning( "Skipping line " + str ); 305 continue outer; 306 } 307 } 308 } 309 continue; 310 } 311 if ( parseGeometryProperty && wkt != -1 && i == wkt ) { 312 try { 313 wktGeom = wrap( vals.get( i ), null ); 314 } catch ( GeometryException e ) { 315 LOG.logError( "Invalid WKT geometry", e ); 316 } 317 if ( wktGeom == null ) { 318 LOG.logError( "Could not parse WKT geometry: " + vals.get( i ) ); 319 } 320 continue; 321 } 322 323 String n; 324 if ( ignoreFirstLine ) { 325 String coln = colNames.get( i ); 326 n = "app:" + ( coln.trim().equals( "" ) ? "property" + i : coln ); 327 } else { 328 n = "app:property" + i; 329 } 330 n = n.replace( ' ', '_' ); 331 QualifiedName name = new QualifiedName( n, APPNS ); 332 fps.add( createFeatureProperty( name, vals.get( i ) ) ); 333 fpt.add( createSimplePropertyType( name, VARCHAR, true ) ); 334 } 335 336 if ( parseGeometryProperty ) { 337 if ( wkt != -1 && wktGeom != null ) { 338 fps.add( createFeatureProperty( geomName, wktGeom ) ); 339 } else { 340 fps.add( createFeatureProperty( geomName, createPoint( x, y, null ) ) ); 341 } 342 fpt.add( createGeometryPropertyType( geomName, null, 1, 1 ) ); 343 } 344 345 FeatureType tp = createFeatureType( featureName, false, fpt.toArray( new PropertyType[fpt.size()] ) ); 346 fc.add( createFeature( ++counter + "", tp, fps ) ); 347 } while ( ( ( str = in.readLine() ) != null ) ); 348 349 in.close(); 350 351 // makes sense (?) 352 if ( fc.size() > 0 ) { 353 fc.setFeatureType( fc.getFeature( 0 ).getFeatureType() ); 354 } 355 356 return fc; 357 } 358 }