001 //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/branches/2.2_testing/src/org/deegree/ogcwebservices/csw/manager/AbstractHarvester.java $ 002 /*---------------- FILE HEADER ------------------------------------------ 003 004 This file is part of deegree. 005 Copyright (C) 2001-2008 by: 006 EXSE, Department of Geography, University of Bonn 007 http://www.giub.uni-bonn.de/deegree/ 008 lat/lon GmbH 009 http://www.lat-lon.de 010 011 This library is free software; you can redistribute it and/or 012 modify it under the terms of the GNU Lesser General Public 013 License as published by the Free Software Foundation; either 014 version 2.1 of the License, or (at your option) any later version. 015 016 This library is distributed in the hope that it will be useful, 017 but WITHOUT ANY WARRANTY; without even the implied warranty of 018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 Lesser General Public License for more details. 020 021 You should have received a copy of the GNU Lesser General Public 022 License along with this library; if not, write to the Free Software 023 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 024 025 Contact: 026 027 Andreas Poth 028 lat/lon GmbH 029 Aennchenstr. 19 030 53115 Bonn 031 Germany 032 E-Mail: poth@lat-lon.de 033 034 Prof. Dr. Klaus Greve 035 Department of Geography 036 University of Bonn 037 Meckenheimer Allee 166 038 53115 Bonn 039 Germany 040 E-Mail: greve@giub.uni-bonn.de 041 042 043 ---------------------------------------------------------------------------*/ 044 package org.deegree.ogcwebservices.csw.manager; 045 046 import java.io.IOException; 047 import java.io.StringReader; 048 import java.net.MalformedURLException; 049 import java.net.URI; 050 import java.net.URISyntaxException; 051 import java.net.URL; 052 import java.sql.SQLException; 053 import java.util.Date; 054 import java.util.Iterator; 055 import java.util.List; 056 import java.util.Timer; 057 import java.util.TimerTask; 058 import java.util.Vector; 059 060 import org.apache.commons.httpclient.HttpClient; 061 import org.apache.commons.httpclient.HttpException; 062 import org.apache.commons.httpclient.methods.GetMethod; 063 import org.deegree.enterprise.WebUtils; 064 import org.deegree.framework.log.ILogger; 065 import org.deegree.framework.log.LoggerFactory; 066 import org.deegree.framework.mail.EMailMessage; 067 import org.deegree.framework.mail.MailHelper; 068 import org.deegree.framework.mail.MailMessage; 069 import org.deegree.framework.mail.SendMailException; 070 import org.deegree.framework.util.FileUtils; 071 import org.deegree.framework.util.StringTools; 072 import org.deegree.framework.xml.NamespaceContext; 073 import org.deegree.framework.xml.XMLFragment; 074 import org.deegree.framework.xml.XMLParsingException; 075 import org.deegree.io.DBPoolException; 076 import org.deegree.ogcbase.CommonNamespaces; 077 import org.deegree.ogcwebservices.OGCWebServiceException; 078 import org.deegree.ogcwebservices.csw.CSWFactory; 079 import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType; 080 import org.xml.sax.SAXException; 081 082 /** 083 * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester 084 * shall be implemented. A concrete implementation of AbstractHarvester will be called within a 085 * timer loop. 086 * 087 * 088 * @version $Revision: 9345 $ 089 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 090 * @author last edited by: $Author: apoth $ 091 * 092 * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $ 093 * 094 * @since 2.0 095 */ 096 public abstract class AbstractHarvester extends TimerTask { 097 098 private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class ); 099 100 private boolean stopped = true; 101 102 private Timer timer = null; 103 104 protected List<URI> inProgress = new Vector<URI>(); 105 106 protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext(); 107 static { 108 try { 109 nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) ); 110 nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) ); 111 nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) ); 112 } catch ( URISyntaxException e ) { 113 e.printStackTrace(); 114 } 115 } 116 117 /** 118 * adds a request to the harvesting process 119 * 120 * @param request 121 * @throws SQLException 122 * @throws DBPoolException 123 */ 124 public void addRequest( Harvest request ) 125 throws IOException, DBPoolException, SQLException { 126 HarvestRepository.getInstance().storeRequest( request ); 127 } 128 129 /** 130 * returns true if the harvesting process is running 131 * 132 * @return <code>true</code> if the harvesting process is running 133 */ 134 public boolean isRunning() { 135 return !stopped; 136 } 137 138 /** 139 * removes a request from the harvesting request. 140 * <p> 141 * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic 142 * harvesting job, so this method can not be called with a standard OGC OWS request !!</b> 143 * </p> 144 * 145 * @param request 146 * @throws SQLException 147 * @throws DBPoolException 148 */ 149 public void removeRequest( Harvest request ) 150 throws IOException, DBPoolException, SQLException { 151 HarvestRepository.getInstance().dropRequest( request.getSource() ); 152 } 153 154 /** 155 * starts the harvesting process 156 * 157 */ 158 public void startHarvesting() { 159 timer = new Timer(); 160 timer.schedule( this, 0, 10000 ); 161 stopped = false; 162 LOG.logInfo( "harvesting has been started" ); 163 } 164 165 /** 166 * stops the harvesting process 167 * 168 */ 169 public void stopHarvesting() { 170 timer.purge(); 171 timer.cancel(); 172 stopped = true; 173 LOG.logInfo( "harvesting has been stopped" ); 174 } 175 176 /** 177 * informs all response handlers assigend to a source about successful harvesting of the source 178 * 179 * @param source 180 * @throws URISyntaxException 181 * @throws SQLException 182 * @throws DBPoolException 183 * @throws MalformedURLException 184 */ 185 protected void informResponseHandlers( URI source ) 186 throws IOException, DBPoolException, SQLException, URISyntaxException { 187 188 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source ); 189 190 for ( Iterator iter = list.iterator(); iter.hasNext(); ) { 191 HarvestRepository.ResponseHandler handler = (HarvestRepository.ResponseHandler) iter.next(); 192 String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" ); 193 sendMessage( handler, message ); 194 } 195 196 } 197 198 /** 199 * returns true if the passed source shall be harvested. this is true if a source has not been 200 * harvested before or the next harvesting timestamp has been reached and the source is of type 201 * 202 * @see HarvestRepository.ResourceType service 203 * 204 * @param source 205 * @return <code>true</code> if the passed source shall be harvested 206 * @throws DBPoolException 207 * @throws SQLException 208 */ 209 protected boolean shallHarvest( URI source, ResourceType targetType ) 210 throws IOException, DBPoolException, SQLException { 211 212 if ( inProgress.contains( source ) ) { 213 return false; 214 } 215 216 HarvestRepository repository = HarvestRepository.getInstance(); 217 218 ResourceType st = repository.getSourceType( source ); 219 220 if ( !st.equals( targetType ) ) { 221 return false; 222 } 223 224 Date lastHarvesting = repository.getLastHarvestingTimestamp( source ); 225 Date nextHarvesting = repository.getNextHarvestingTimestamp( source ); 226 227 long tmp = System.currentTimeMillis() - nextHarvesting.getTime(); 228 return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source ); 229 } 230 231 /** 232 * informs all response handlers assigend to a source about an exception that occurs when 233 * harvesting a source 234 * 235 * @param source 236 * @param e 237 * @throws URISyntaxException 238 * @throws SQLException 239 * @throws DBPoolException 240 */ 241 protected void informResponseHandlers( URI source, Throwable e ) 242 throws IOException, DBPoolException, SQLException, URISyntaxException { 243 244 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source ); 245 246 for ( Iterator iter = list.iterator(); iter.hasNext(); ) { 247 HarvestRepository.ResponseHandler handler = (HarvestRepository.ResponseHandler) iter.next(); 248 String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ", 249 e.getMessage() ); 250 sendMessage( handler, message ); 251 252 } 253 254 } 255 256 /** 257 * 258 * @param handler 259 * @param message 260 * @throws SendMailException 261 * @throws MalformedURLException 262 * @throws IOException 263 * @throws HttpException 264 */ 265 private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) { 266 if ( handler.isMailAddress() ) { 267 String s = handler.getUri().toASCIIString(); 268 int p = s.indexOf( ":" ); 269 s = s.substring( p + 1, s.length() ); 270 LOG.logDebug( "sending message", message ); 271 LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) ); 272 MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message ); 273 try { 274 MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) ); 275 } catch ( Exception e ) { 276 LOG.logInfo( "could not send mail to admin:", e.getMessage() ); 277 LOG.logError( e.getMessage(), e ); 278 } 279 } else { 280 LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) ); 281 HttpClient client = new HttpClient(); 282 LOG.logDebug( message ); 283 284 try { 285 client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() ); 286 GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message ); 287 client.executeMethod( get ); 288 } catch ( Exception e ) { 289 LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: " 290 + e.getMessage() ); 291 } 292 293 } 294 } 295 296 /** 297 * abstract super class for all harvest processores 298 * 299 * 300 * @version $Revision: 9345 $ 301 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 302 * @author last edited by: $Author: apoth $ 303 * 304 * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $ 305 * 306 * @since 2.0 307 */ 308 protected abstract class AbstractHarvestProcessor extends Thread { 309 310 protected URI source = null; 311 312 protected AbstractHarvester owner = null; 313 314 protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) { 315 this.owner = owner; 316 this.source = source; 317 } 318 319 /** 320 * performs a transaction for inserting or updating a service meta data record in the 321 * catalogue a harvester instance belongs too 322 * 323 * @param trans 324 * @throws SAXException 325 * @throws IOException 326 * @throws XMLParsingException 327 * @throws OGCWebServiceException 328 */ 329 protected void performTransaction( String trans ) 330 throws SAXException, IOException, OGCWebServiceException { 331 332 StringReader sr = new StringReader( trans ); 333 XMLFragment xml = new XMLFragment(); 334 xml.load( sr, XMLFragment.DEFAULT_URL ); 335 Transaction transaction = Transaction.create( "id", xml.getRootElement() ); 336 CSWFactory.getService().doService( transaction ); 337 338 } 339 340 /** 341 * creates a CSW Transaction including an Update operation for the passed meta data. 342 * 343 * @param identifier 344 * @param xpath 345 * @param metaData 346 * @return update request 347 * @throws IOException 348 */ 349 protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData ) 350 throws IOException { 351 352 // read template from file 353 // TODO 354 // read different templates depending on metadata format 355 URL url = AbstractHarvester.class.getResource( "iso09_update_template.xml" ); 356 String update = FileUtils.readTextFile( url ).toString(); 357 358 // remove XML doctype section to enable inserting the metadata set as string 359 // into the template 360 String s = metaData.getAsString(); 361 int p = s.lastIndexOf( "?>" ); 362 if ( p > -1 ) { 363 s = s.substring( p + 2, s.length() ); 364 } 365 366 // set metadata set to update by replacing the string '$metadata$' 367 // in the template 368 update = StringTools.replace( update, "$metadata$", s, false ); 369 s = createConstraint( identifier, xpath ).toString(); 370 371 // set filter/constraint to identify the metadata set to be updated 372 // by replacing the string '$constraints$' in the template 373 return StringTools.replace( update, "$constraints$", s, false ); 374 } 375 376 /** 377 * creates a transaction request including a delete operation to remove the metadata record 378 * with the passed fileIdentifier from the catalogue 379 * 380 * @param identifier 381 * @param xpath 382 * @return delete request 383 * @throws IOException 384 */ 385 protected String createDeleteRequest( String identifier, String xpath ) 386 throws IOException { 387 // read template from file 388 // TODO 389 // read different templates depending on metadata format 390 URL url = AbstractHarvester.class.getResource( "iso09_delete_template.xml" ); 391 String delete = FileUtils.readTextFile( url ).toString(); 392 393 String s = createConstraint( identifier, xpath ).toString(); 394 395 // set filter/constraint to identify the metadata set to be deleted 396 // by replacing the string '$constraints$' in the template 397 return StringTools.replace( delete, "$constraints$", s, false ); 398 } 399 400 /** 401 * a constraint for delete und update operation depends on concrete metadata format. An 402 * implementing class must consider this. 403 * 404 * @param fileIdentifier 405 * value to be compared 406 * @param xpath 407 * comparable property 408 * @return 409 */ 410 protected abstract String createConstraint( String fileIdentifier, String xpath ) 411 throws IOException; 412 413 /** 414 * creates a CSW Transaction including an Update operation for the passed meta data 415 * 416 * @param metaData 417 * @return 418 * @throws IOException 419 */ 420 protected String createInsertRequest( XMLFragment metaData ) 421 throws IOException { 422 // read template from file 423 // TODO 424 // read different templates depending on metadata format 425 URL url = AbstractHarvester.class.getResource( "iso09_insert_template.xml" ); 426 String insert = FileUtils.readTextFile( url ).toString(); 427 428 String s = metaData.getAsString(); 429 int p = s.lastIndexOf( "?>" ); 430 if ( p > -1 ) { 431 s = s.substring( p + 2, s.length() ); 432 } 433 434 // set metadata set to insert by replacing the string '$metadata$' 435 // in the template 436 return StringTools.replace( insert, "$metadata$", s, false ); 437 438 } 439 440 /** 441 * actualizes the source in the repository with timestamp of last harvesting 442 * 443 * @param source 444 * @param date 445 * @throws SQLException 446 * @throws DBPoolException 447 */ 448 protected void writeLastHarvestingTimestamp( URI source, Date date ) 449 throws IOException, DBPoolException, SQLException { 450 HarvestRepository repository = HarvestRepository.getInstance(); 451 repository.setLastHarvestingTimestamp( source, date ); 452 } 453 454 /** 455 * actualizes the source in the repository with timestamp when next harvesting shall be 456 * performed 457 * 458 * @param source 459 * @param date 460 * @throws SQLException 461 * @throws DBPoolException 462 */ 463 protected void writeNextHarvestingTimestamp( URI source, Date date ) 464 throws IOException, DBPoolException, SQLException { 465 HarvestRepository repository = HarvestRepository.getInstance(); 466 long ts = repository.getHarvestInterval( source ); 467 date = new Date( ts + date.getTime() ); 468 repository.setNextHarvestingTimestamp( source, date ); 469 } 470 471 } 472 473 }