001 //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/branches/2.2_testing/src/org/deegree/ogcwebservices/csw/manager/AbstractHarvester.java $
002 /*---------------- FILE HEADER ------------------------------------------
003
004 This file is part of deegree.
005 Copyright (C) 2001-2008 by:
006 EXSE, Department of Geography, University of Bonn
007 http://www.giub.uni-bonn.de/deegree/
008 lat/lon GmbH
009 http://www.lat-lon.de
010
011 This library is free software; you can redistribute it and/or
012 modify it under the terms of the GNU Lesser General Public
013 License as published by the Free Software Foundation; either
014 version 2.1 of the License, or (at your option) any later version.
015
016 This library is distributed in the hope that it will be useful,
017 but WITHOUT ANY WARRANTY; without even the implied warranty of
018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 Lesser General Public License for more details.
020
021 You should have received a copy of the GNU Lesser General Public
022 License along with this library; if not, write to the Free Software
023 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024
025 Contact:
026
027 Andreas Poth
028 lat/lon GmbH
029 Aennchenstr. 19
030 53115 Bonn
031 Germany
032 E-Mail: poth@lat-lon.de
033
034 Prof. Dr. Klaus Greve
035 Department of Geography
036 University of Bonn
037 Meckenheimer Allee 166
038 53115 Bonn
039 Germany
040 E-Mail: greve@giub.uni-bonn.de
041
042
043 ---------------------------------------------------------------------------*/
044 package org.deegree.ogcwebservices.csw.manager;
045
046 import java.io.IOException;
047 import java.io.StringReader;
048 import java.net.MalformedURLException;
049 import java.net.URI;
050 import java.net.URISyntaxException;
051 import java.net.URL;
052 import java.sql.SQLException;
053 import java.util.Date;
054 import java.util.Iterator;
055 import java.util.List;
056 import java.util.Timer;
057 import java.util.TimerTask;
058 import java.util.Vector;
059
060 import org.apache.commons.httpclient.HttpClient;
061 import org.apache.commons.httpclient.HttpException;
062 import org.apache.commons.httpclient.methods.GetMethod;
063 import org.deegree.enterprise.WebUtils;
064 import org.deegree.framework.log.ILogger;
065 import org.deegree.framework.log.LoggerFactory;
066 import org.deegree.framework.mail.EMailMessage;
067 import org.deegree.framework.mail.MailHelper;
068 import org.deegree.framework.mail.MailMessage;
069 import org.deegree.framework.mail.SendMailException;
070 import org.deegree.framework.util.FileUtils;
071 import org.deegree.framework.util.StringTools;
072 import org.deegree.framework.xml.NamespaceContext;
073 import org.deegree.framework.xml.XMLFragment;
074 import org.deegree.framework.xml.XMLParsingException;
075 import org.deegree.io.DBPoolException;
076 import org.deegree.ogcbase.CommonNamespaces;
077 import org.deegree.ogcwebservices.OGCWebServiceException;
078 import org.deegree.ogcwebservices.csw.CSWFactory;
079 import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
080 import org.xml.sax.SAXException;
081
082 /**
083 * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester
084 * shall be implemented. A concrete implementation of AbstractHarvester will be called within a
085 * timer loop.
086 *
087 *
088 * @version $Revision: 9345 $
089 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
090 * @author last edited by: $Author: apoth $
091 *
092 * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $
093 *
094 * @since 2.0
095 */
096 public abstract class AbstractHarvester extends TimerTask {
097
098 private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class );
099
100 private boolean stopped = true;
101
102 private Timer timer = null;
103
104 protected List<URI> inProgress = new Vector<URI>();
105
106 protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext();
107 static {
108 try {
109 nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) );
110 nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) );
111 nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) );
112 } catch ( URISyntaxException e ) {
113 e.printStackTrace();
114 }
115 }
116
117 /**
118 * adds a request to the harvesting process
119 *
120 * @param request
121 * @throws SQLException
122 * @throws DBPoolException
123 */
124 public void addRequest( Harvest request )
125 throws IOException, DBPoolException, SQLException {
126 HarvestRepository.getInstance().storeRequest( request );
127 }
128
129 /**
130 * returns true if the harvesting process is running
131 *
132 * @return <code>true</code> if the harvesting process is running
133 */
134 public boolean isRunning() {
135 return !stopped;
136 }
137
138 /**
139 * removes a request from the harvesting request.
140 * <p>
141 * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic
142 * harvesting job, so this method can not be called with a standard OGC OWS request !!</b>
143 * </p>
144 *
145 * @param request
146 * @throws SQLException
147 * @throws DBPoolException
148 */
149 public void removeRequest( Harvest request )
150 throws IOException, DBPoolException, SQLException {
151 HarvestRepository.getInstance().dropRequest( request.getSource() );
152 }
153
154 /**
155 * starts the harvesting process
156 *
157 */
158 public void startHarvesting() {
159 timer = new Timer();
160 timer.schedule( this, 0, 10000 );
161 stopped = false;
162 LOG.logInfo( "harvesting has been started" );
163 }
164
165 /**
166 * stops the harvesting process
167 *
168 */
169 public void stopHarvesting() {
170 timer.purge();
171 timer.cancel();
172 stopped = true;
173 LOG.logInfo( "harvesting has been stopped" );
174 }
175
176 /**
177 * informs all response handlers assigend to a source about successful harvesting of the source
178 *
179 * @param source
180 * @throws URISyntaxException
181 * @throws SQLException
182 * @throws DBPoolException
183 * @throws MalformedURLException
184 */
185 protected void informResponseHandlers( URI source )
186 throws IOException, DBPoolException, SQLException, URISyntaxException {
187
188 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
189
190 for ( Iterator iter = list.iterator(); iter.hasNext(); ) {
191 HarvestRepository.ResponseHandler handler = (HarvestRepository.ResponseHandler) iter.next();
192 String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" );
193 sendMessage( handler, message );
194 }
195
196 }
197
198 /**
199 * returns true if the passed source shall be harvested. this is true if a source has not been
200 * harvested before or the next harvesting timestamp has been reached and the source is of type
201 *
202 * @see HarvestRepository.ResourceType service
203 *
204 * @param source
205 * @return <code>true</code> if the passed source shall be harvested
206 * @throws DBPoolException
207 * @throws SQLException
208 */
209 protected boolean shallHarvest( URI source, ResourceType targetType )
210 throws IOException, DBPoolException, SQLException {
211
212 if ( inProgress.contains( source ) ) {
213 return false;
214 }
215
216 HarvestRepository repository = HarvestRepository.getInstance();
217
218 ResourceType st = repository.getSourceType( source );
219
220 if ( !st.equals( targetType ) ) {
221 return false;
222 }
223
224 Date lastHarvesting = repository.getLastHarvestingTimestamp( source );
225 Date nextHarvesting = repository.getNextHarvestingTimestamp( source );
226
227 long tmp = System.currentTimeMillis() - nextHarvesting.getTime();
228 return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source );
229 }
230
231 /**
232 * informs all response handlers assigend to a source about an exception that occurs when
233 * harvesting a source
234 *
235 * @param source
236 * @param e
237 * @throws URISyntaxException
238 * @throws SQLException
239 * @throws DBPoolException
240 */
241 protected void informResponseHandlers( URI source, Throwable e )
242 throws IOException, DBPoolException, SQLException, URISyntaxException {
243
244 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
245
246 for ( Iterator iter = list.iterator(); iter.hasNext(); ) {
247 HarvestRepository.ResponseHandler handler = (HarvestRepository.ResponseHandler) iter.next();
248 String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ",
249 e.getMessage() );
250 sendMessage( handler, message );
251
252 }
253
254 }
255
256 /**
257 *
258 * @param handler
259 * @param message
260 * @throws SendMailException
261 * @throws MalformedURLException
262 * @throws IOException
263 * @throws HttpException
264 */
265 private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) {
266 if ( handler.isMailAddress() ) {
267 String s = handler.getUri().toASCIIString();
268 int p = s.indexOf( ":" );
269 s = s.substring( p + 1, s.length() );
270 LOG.logDebug( "sending message", message );
271 LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) );
272 MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message );
273 try {
274 MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) );
275 } catch ( Exception e ) {
276 LOG.logInfo( "could not send mail to admin:", e.getMessage() );
277 LOG.logError( e.getMessage(), e );
278 }
279 } else {
280 LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) );
281 HttpClient client = new HttpClient();
282 LOG.logDebug( message );
283
284 try {
285 client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() );
286 GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message );
287 client.executeMethod( get );
288 } catch ( Exception e ) {
289 LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: "
290 + e.getMessage() );
291 }
292
293 }
294 }
295
296 /**
297 * abstract super class for all harvest processores
298 *
299 *
300 * @version $Revision: 9345 $
301 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
302 * @author last edited by: $Author: apoth $
303 *
304 * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $
305 *
306 * @since 2.0
307 */
308 protected abstract class AbstractHarvestProcessor extends Thread {
309
310 protected URI source = null;
311
312 protected AbstractHarvester owner = null;
313
314 protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) {
315 this.owner = owner;
316 this.source = source;
317 }
318
319 /**
320 * performs a transaction for inserting or updating a service meta data record in the
321 * catalogue a harvester instance belongs too
322 *
323 * @param trans
324 * @throws SAXException
325 * @throws IOException
326 * @throws XMLParsingException
327 * @throws OGCWebServiceException
328 */
329 protected void performTransaction( String trans )
330 throws SAXException, IOException, OGCWebServiceException {
331
332 StringReader sr = new StringReader( trans );
333 XMLFragment xml = new XMLFragment();
334 xml.load( sr, XMLFragment.DEFAULT_URL );
335 Transaction transaction = Transaction.create( "id", xml.getRootElement() );
336 CSWFactory.getService().doService( transaction );
337
338 }
339
340 /**
341 * creates a CSW Transaction including an Update operation for the passed meta data.
342 *
343 * @param identifier
344 * @param xpath
345 * @param metaData
346 * @return update request
347 * @throws IOException
348 */
349 protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData )
350 throws IOException {
351
352 // read template from file
353 // TODO
354 // read different templates depending on metadata format
355 URL url = AbstractHarvester.class.getResource( "iso09_update_template.xml" );
356 String update = FileUtils.readTextFile( url ).toString();
357
358 // remove XML doctype section to enable inserting the metadata set as string
359 // into the template
360 String s = metaData.getAsString();
361 int p = s.lastIndexOf( "?>" );
362 if ( p > -1 ) {
363 s = s.substring( p + 2, s.length() );
364 }
365
366 // set metadata set to update by replacing the string '$metadata$'
367 // in the template
368 update = StringTools.replace( update, "$metadata$", s, false );
369 s = createConstraint( identifier, xpath ).toString();
370
371 // set filter/constraint to identify the metadata set to be updated
372 // by replacing the string '$constraints$' in the template
373 return StringTools.replace( update, "$constraints$", s, false );
374 }
375
376 /**
377 * creates a transaction request including a delete operation to remove the metadata record
378 * with the passed fileIdentifier from the catalogue
379 *
380 * @param identifier
381 * @param xpath
382 * @return delete request
383 * @throws IOException
384 */
385 protected String createDeleteRequest( String identifier, String xpath )
386 throws IOException {
387 // read template from file
388 // TODO
389 // read different templates depending on metadata format
390 URL url = AbstractHarvester.class.getResource( "iso09_delete_template.xml" );
391 String delete = FileUtils.readTextFile( url ).toString();
392
393 String s = createConstraint( identifier, xpath ).toString();
394
395 // set filter/constraint to identify the metadata set to be deleted
396 // by replacing the string '$constraints$' in the template
397 return StringTools.replace( delete, "$constraints$", s, false );
398 }
399
400 /**
401 * a constraint for delete und update operation depends on concrete metadata format. An
402 * implementing class must consider this.
403 *
404 * @param fileIdentifier
405 * value to be compared
406 * @param xpath
407 * comparable property
408 * @return
409 */
410 protected abstract String createConstraint( String fileIdentifier, String xpath )
411 throws IOException;
412
413 /**
414 * creates a CSW Transaction including an Update operation for the passed meta data
415 *
416 * @param metaData
417 * @return
418 * @throws IOException
419 */
420 protected String createInsertRequest( XMLFragment metaData )
421 throws IOException {
422 // read template from file
423 // TODO
424 // read different templates depending on metadata format
425 URL url = AbstractHarvester.class.getResource( "iso09_insert_template.xml" );
426 String insert = FileUtils.readTextFile( url ).toString();
427
428 String s = metaData.getAsString();
429 int p = s.lastIndexOf( "?>" );
430 if ( p > -1 ) {
431 s = s.substring( p + 2, s.length() );
432 }
433
434 // set metadata set to insert by replacing the string '$metadata$'
435 // in the template
436 return StringTools.replace( insert, "$metadata$", s, false );
437
438 }
439
440 /**
441 * actualizes the source in the repository with timestamp of last harvesting
442 *
443 * @param source
444 * @param date
445 * @throws SQLException
446 * @throws DBPoolException
447 */
448 protected void writeLastHarvestingTimestamp( URI source, Date date )
449 throws IOException, DBPoolException, SQLException {
450 HarvestRepository repository = HarvestRepository.getInstance();
451 repository.setLastHarvestingTimestamp( source, date );
452 }
453
454 /**
455 * actualizes the source in the repository with timestamp when next harvesting shall be
456 * performed
457 *
458 * @param source
459 * @param date
460 * @throws SQLException
461 * @throws DBPoolException
462 */
463 protected void writeNextHarvestingTimestamp( URI source, Date date )
464 throws IOException, DBPoolException, SQLException {
465 HarvestRepository repository = HarvestRepository.getInstance();
466 long ts = repository.getHarvestInterval( source );
467 date = new Date( ts + date.getTime() );
468 repository.setNextHarvestingTimestamp( source, date );
469 }
470
471 }
472
473 }