001 //$HeadURL: https://svn.wald.intevation.org/svn/deegree/base/branches/2.3_testing/src/org/deegree/io/csv/CSVReader.java $
002 /*----------------------------------------------------------------------------
003 This file is part of deegree, http://deegree.org/
004 Copyright (C) 2001-2009 by:
005 Department of Geography, University of Bonn
006 and
007 lat/lon GmbH
008
009 This library is free software; you can redistribute it and/or modify it under
010 the terms of the GNU Lesser General Public License as published by the Free
011 Software Foundation; either version 2.1 of the License, or (at your option)
012 any later version.
013 This library is distributed in the hope that it will be useful, but WITHOUT
014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
015 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
016 details.
017 You should have received a copy of the GNU Lesser General Public License
018 along with this library; if not, write to the Free Software Foundation, Inc.,
019 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
020
021 Contact information:
022
023 lat/lon GmbH
024 Aennchenstr. 19, 53177 Bonn
025 Germany
026 http://lat-lon.de/
027
028 Department of Geography, University of Bonn
029 Prof. Dr. Klaus Greve
030 Postfach 1147, 53001 Bonn
031 Germany
032 http://www.geographie.uni-bonn.de/deegree/
033
034 e-mail: info@deegree.org
035 ----------------------------------------------------------------------------*/
036
037 package org.deegree.io.csv;
038
039 import static java.io.StreamTokenizer.TT_EOF;
040 import static java.lang.Double.parseDouble;
041 import static java.util.Collections.unmodifiableList;
042 import static org.deegree.datatypes.Types.VARCHAR;
043 import static org.deegree.framework.log.LoggerFactory.getLogger;
044 import static org.deegree.io.mapinfoapi.MapInfoReader.whitespaceChars;
045 import static org.deegree.io.mapinfoapi.MapInfoReader.wordChars;
046 import static org.deegree.model.feature.FeatureFactory.createFeature;
047 import static org.deegree.model.feature.FeatureFactory.createFeatureCollection;
048 import static org.deegree.model.feature.FeatureFactory.createFeatureProperty;
049 import static org.deegree.model.feature.FeatureFactory.createFeatureType;
050 import static org.deegree.model.feature.FeatureFactory.createGeometryPropertyType;
051 import static org.deegree.model.feature.FeatureFactory.createSimplePropertyType;
052 import static org.deegree.model.spatialschema.GeometryFactory.createPoint;
053 import static org.deegree.model.spatialschema.WKTAdapter.wrap;
054
055 import java.io.BufferedReader;
056 import java.io.File;
057 import java.io.FileReader;
058 import java.io.IOException;
059 import java.io.StreamTokenizer;
060 import java.io.StringReader;
061 import java.net.URI;
062 import java.net.URISyntaxException;
063 import java.util.ArrayList;
064 import java.util.LinkedList;
065 import java.util.List;
066
067 import org.deegree.datatypes.QualifiedName;
068 import org.deegree.framework.log.ILogger;
069 import org.deegree.model.feature.FeatureCollection;
070 import org.deegree.model.feature.FeatureProperty;
071 import org.deegree.model.feature.schema.FeatureType;
072 import org.deegree.model.feature.schema.PropertyType;
073 import org.deegree.model.spatialschema.Geometry;
074 import org.deegree.model.spatialschema.GeometryException;
075
076 /**
077 * <code>CSVReader</code>
078 *
079 * @author <a href="mailto:schmitz@lat-lon.de">Andreas Schmitz</a>
080 * @author last edited by: $Author: mschneider $
081 *
082 * @version $Revision: 18195 $, $Date: 2009-06-18 17:55:39 +0200 (Do, 18. Jun 2009) $
083 */
084 public class CSVReader {
085
086 private static final ILogger LOG = getLogger( CSVReader.class );
087
088 private File fileName;
089
090 private int xcol = 0, ycol = 1, wkt = -1;
091
092 private static URI APPNS;
093
094 private List<String[]> header;
095
096 private boolean ignoreFirstLine, parseGeometryProperty = true;
097
098 static {
099 try {
100 APPNS = new URI( "http://www.deegree.org/app" );
101 } catch ( URISyntaxException e ) {
102 // yes, cannot happen
103 }
104 }
105
106 /**
107 * @param name
108 * @param ignoreFirstLine
109 * @throws IOException
110 */
111 public CSVReader( String name, boolean ignoreFirstLine ) throws IOException {
112 this.ignoreFirstLine = ignoreFirstLine;
113
114 fileName = new File( name ).getAbsoluteFile();
115
116 header = new ArrayList<String[]>( 3 );
117
118 BufferedReader in = new BufferedReader( new FileReader( name ) );
119 String str = in.readLine();
120 char separat = determineSeparator( str );
121 do {
122 List<String> lst = parseLine( str, separat );
123 header.add( lst.toArray( new String[lst.size()] ) );
124 } while ( ( ( str = in.readLine() ) != null ) && header.size() < 3 );
125 in.close();
126 }
127
128 /**
129 * @return max. the first three lines of the file (if there are three)
130 */
131 public List<String[]> getHeader() {
132 return unmodifiableList( header );
133 }
134
135 /**
136 * By default, a geometry property will be parsed. Set this to false to get "simple property only" features.
137 *
138 * @param parseGeometryProperty
139 */
140 public void setParseGeometryProperty( boolean parseGeometryProperty ) {
141 this.parseGeometryProperty = parseGeometryProperty;
142 }
143
144 private static char determineSeparator( String s ) {
145 // determine most likely separator
146 int ccount = countChars( s, ',' );
147 int scount = countChars( s, ';' );
148 int tcount = countChars( s, '\t' );
149 if ( ccount >= scount && ccount >= tcount ) {
150 return ',';
151 }
152 if ( tcount >= ccount && tcount >= scount ) {
153 return '\t';
154 }
155 if ( scount >= ccount && scount >= tcount ) {
156 return ';';
157 }
158 return ',';
159 }
160
161 private static List<String> parseLine( String line, char separator )
162 throws IOException {
163 String seps = ",;\t";
164 for ( int i = 0; i < seps.length(); ++i ) {
165 if ( line.startsWith( "" + seps.charAt( i ) ) ) {
166 line = "\"\"" + line;
167 }
168 String dseps = "" + seps.charAt( i ) + seps.charAt( i );
169 while ( line.indexOf( dseps ) != -1 ) {
170 line = line.replace( dseps, seps.charAt( i ) + "\"\"" + seps.charAt( i ) );
171 }
172 }
173 StreamTokenizer tok = getCSVFromStringTokenizer( line, separator );
174
175 LinkedList<String> list = new LinkedList<String>();
176
177 tok.nextToken();
178 if ( tok.ttype == TT_EOF ) {
179 return list;
180 }
181 while ( tok.ttype != TT_EOF ) {
182 list.add( tok.sval );
183 tok.nextToken();
184 }
185
186 return list;
187 }
188
189 /**
190 * Also sets wkt to -1.
191 *
192 * @param x
193 * @param y
194 */
195 public void setPointColumns( int x, int y ) {
196 xcol = x;
197 ycol = y;
198 wkt = -1;
199 }
200
201 /**
202 * @param wkt
203 * if -1, x/y will be used instead
204 */
205 public void setWKTColumn( int wkt ) {
206 this.wkt = wkt;
207 }
208
209 /**
210 * @param input
211 * @param separator
212 * @return a tokenizer with a stringreader as data input
213 */
214 public static StreamTokenizer getCSVFromStringTokenizer( String input, char separator ) {
215 StreamTokenizer tok = new StreamTokenizer( new StringReader( input ) );
216
217 tok.resetSyntax();
218 tok.eolIsSignificant( true );
219 tok.lowerCaseMode( true );
220 tok.slashSlashComments( false );
221 tok.slashStarComments( false );
222 tok.wordChars( 'a', 'z' );
223 tok.wordChars( 'A', 'Z' );
224 tok.wordChars( '\u00a0', '\u00ff' );
225 tok.wordChars( '0', '9' );
226 wordChars( tok, ',', '\t', ';' );
227 wordChars( tok, '.', '-', '_', ' ', '+', '/', '\\', '(', ')', '^' );
228 tok.quoteChar( '"' );
229 whitespaceChars( tok, '\n', '\r', '\f' );
230
231 // reset separator
232 whitespaceChars( tok, separator );
233
234 return tok;
235 }
236
237 private static int countChars( String s, char c ) {
238 int count = 0;
239 for ( int i = 0; i < s.length(); ++i ) {
240 if ( s.charAt( i ) == c ) {
241 ++count;
242 }
243 }
244 return count;
245 }
246
247 /**
248 * @return a new feature collection
249 * @throws IOException
250 */
251 public FeatureCollection parseFeatureCollection()
252 throws IOException {
253 FeatureCollection fc = createFeatureCollection( "uniquemy_", 512 );
254 QualifiedName geomName = new QualifiedName( "app:geometry", APPNS );
255 QualifiedName featureName = new QualifiedName( "app:feature", APPNS );
256
257 int counter = 0;
258
259 BufferedReader in = new BufferedReader( new FileReader( fileName ) );
260 String str = in.readLine();
261 List<String> colNames = null;
262
263 char separator = determineSeparator( str );
264 if ( ignoreFirstLine ) {
265 colNames = parseLine( str, separator );
266 str = in.readLine();
267 }
268 outer: do {
269 LOG.logDebug( "Trying to parse line ", str );
270 List<String> vals = parseLine( str, separator );
271
272 double x = 0, y = 0;
273 Geometry wktGeom = null;
274 LinkedList<FeatureProperty> fps = new LinkedList<FeatureProperty>();
275 LinkedList<PropertyType> fpt = new LinkedList<PropertyType>();
276
277 for ( int i = 0; i < vals.size(); ++i ) {
278
279 if ( parseGeometryProperty && wkt == -1 && i == xcol ) {
280 try {
281 x = parseDouble( vals.get( i ) );
282 } catch ( NumberFormatException nfe ) {
283 // puh, CSV is an easy format? I think not...
284 try {
285 x = parseDouble( vals.get( i ).replace( ",", "." ) );
286 } catch ( NumberFormatException nfe2 ) {
287 LOG.logWarning( "Skipping line " + str );
288 continue outer;
289 }
290 }
291 continue;
292 }
293 if ( parseGeometryProperty && wkt == -1 && i == ycol ) {
294 if ( vals.get( i ).equals( "" ) ) {
295 y = 0; // this seems to be a sensible (Java-like) default
296 } else {
297 try {
298 y = parseDouble( vals.get( i ) );
299 } catch ( NumberFormatException nfe ) {
300 // puh, CSV is an easy format? I think not...
301 try {
302 y = parseDouble( vals.get( i ).replace( ",", "." ) );
303 } catch ( NumberFormatException nfe2 ) {
304 LOG.logWarning( "Skipping line " + str );
305 continue outer;
306 }
307 }
308 }
309 continue;
310 }
311 if ( parseGeometryProperty && wkt != -1 && i == wkt ) {
312 try {
313 wktGeom = wrap( vals.get( i ), null );
314 } catch ( GeometryException e ) {
315 LOG.logError( "Invalid WKT geometry", e );
316 }
317 if ( wktGeom == null ) {
318 LOG.logError( "Could not parse WKT geometry: " + vals.get( i ) );
319 }
320 continue;
321 }
322
323 String n;
324 if ( ignoreFirstLine ) {
325 String coln = colNames.get( i );
326 n = "app:" + ( coln.trim().equals( "" ) ? "property" + i : coln );
327 } else {
328 n = "app:property" + i;
329 }
330 n = n.replace( ' ', '_' );
331 QualifiedName name = new QualifiedName( n, APPNS );
332 fps.add( createFeatureProperty( name, vals.get( i ) ) );
333 fpt.add( createSimplePropertyType( name, VARCHAR, true ) );
334 }
335
336 if ( parseGeometryProperty ) {
337 if ( wkt != -1 && wktGeom != null ) {
338 fps.add( createFeatureProperty( geomName, wktGeom ) );
339 } else {
340 fps.add( createFeatureProperty( geomName, createPoint( x, y, null ) ) );
341 }
342 fpt.add( createGeometryPropertyType( geomName, null, 1, 1 ) );
343 }
344
345 FeatureType tp = createFeatureType( featureName, false, fpt.toArray( new PropertyType[fpt.size()] ) );
346 fc.add( createFeature( ++counter + "", tp, fps ) );
347 } while ( ( ( str = in.readLine() ) != null ) );
348
349 in.close();
350
351 // makes sense (?)
352 if ( fc.size() > 0 ) {
353 fc.setFeatureType( fc.getFeature( 0 ).getFeatureType() );
354 }
355
356 return fc;
357 }
358 }