001 //$HeadURL: svn+ssh://jwilden@svn.wald.intevation.org/deegree/base/branches/2.5_testing/src/org/deegree/ogcwebservices/csw/manager/AbstractHarvester.java $ 002 /*---------------------------------------------------------------------------- 003 This file is part of deegree, http://deegree.org/ 004 Copyright (C) 2001-2009 by: 005 Department of Geography, University of Bonn 006 and 007 lat/lon GmbH 008 009 This library is free software; you can redistribute it and/or modify it under 010 the terms of the GNU Lesser General Public License as published by the Free 011 Software Foundation; either version 2.1 of the License, or (at your option) 012 any later version. 013 This library is distributed in the hope that it will be useful, but WITHOUT 014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 015 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 016 details. 017 You should have received a copy of the GNU Lesser General Public License 018 along with this library; if not, write to the Free Software Foundation, Inc., 019 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 020 021 Contact information: 022 023 lat/lon GmbH 024 Aennchenstr. 19, 53177 Bonn 025 Germany 026 http://lat-lon.de/ 027 028 Department of Geography, University of Bonn 029 Prof. Dr. Klaus Greve 030 Postfach 1147, 53001 Bonn 031 Germany 032 http://www.geographie.uni-bonn.de/deegree/ 033 034 e-mail: info@deegree.org 035 ----------------------------------------------------------------------------*/ 036 package org.deegree.ogcwebservices.csw.manager; 037 038 import java.io.IOException; 039 import java.io.StringReader; 040 import java.net.MalformedURLException; 041 import java.net.URI; 042 import java.net.URISyntaxException; 043 import java.net.URL; 044 import java.sql.SQLException; 045 import java.util.Date; 046 import java.util.Iterator; 047 import java.util.List; 048 import java.util.Timer; 049 import java.util.TimerTask; 050 import java.util.Vector; 051 052 import org.apache.commons.httpclient.HttpClient; 053 import org.apache.commons.httpclient.methods.GetMethod; 054 import org.deegree.enterprise.WebUtils; 055 import org.deegree.framework.log.ILogger; 056 import org.deegree.framework.log.LoggerFactory; 057 import org.deegree.framework.mail.EMailMessage; 058 import org.deegree.framework.mail.MailHelper; 059 import org.deegree.framework.mail.MailMessage; 060 import org.deegree.framework.util.FileUtils; 061 import org.deegree.framework.util.StringTools; 062 import org.deegree.framework.xml.NamespaceContext; 063 import org.deegree.framework.xml.XMLFragment; 064 import org.deegree.io.DBPoolException; 065 import org.deegree.ogcbase.CommonNamespaces; 066 import org.deegree.ogcwebservices.OGCWebServiceException; 067 import org.deegree.ogcwebservices.csw.CSWFactory; 068 import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType; 069 import org.xml.sax.SAXException; 070 071 /** 072 * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester shall be implemented. A 073 * concrete implementation of AbstractHarvester will be called within a timer loop. 074 * 075 * 076 * @version $Revision: 21672 $ 077 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 078 * @author last edited by: $Author: apoth $ 079 * 080 * @version 1.0. $Revision: 21672 $, $Date: 2009-12-29 09:44:20 +0100 (Di, 29 Dez 2009) $ 081 * 082 * @since 2.0 083 */ 084 public abstract class AbstractHarvester extends TimerTask { 085 086 private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class ); 087 088 private boolean stopped = true; 089 090 private Timer timer = null; 091 092 protected List<URI> inProgress = new Vector<URI>(); 093 094 protected String version; 095 096 protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext(); 097 static { 098 try { 099 nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) ); 100 nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) ); 101 nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) ); 102 } catch ( URISyntaxException e ) { 103 e.printStackTrace(); 104 } 105 } 106 107 /** 108 * @param version 109 * the version of the CSW 110 */ 111 protected AbstractHarvester( String version ) { 112 this.version = version; 113 } 114 115 /** 116 * adds a request to the harvesting process 117 * 118 * @param request 119 * @throws IOException 120 * @throws SQLException 121 * @throws DBPoolException 122 */ 123 public void addRequest( Harvest request ) 124 throws IOException, DBPoolException, SQLException { 125 HarvestRepository.getInstance().storeRequest( request ); 126 } 127 128 /** 129 * returns true if the harvesting process is running 130 * 131 * @return <code>true</code> if the harvesting process is running 132 */ 133 public boolean isRunning() { 134 return !stopped; 135 } 136 137 /** 138 * removes a request from the harvesting request. 139 * <p> 140 * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic harvesting job, so this 141 * method can not be called with a standard OGC OWS request !!</b> 142 * </p> 143 * 144 * @param request 145 * @throws IOException 146 * @throws SQLException 147 * @throws DBPoolException 148 */ 149 public void removeRequest( Harvest request ) 150 throws IOException, DBPoolException, SQLException { 151 HarvestRepository.getInstance().dropRequest( request.getSource() ); 152 } 153 154 /** 155 * starts the harvesting process 156 * 157 */ 158 public void startHarvesting() { 159 timer = new Timer(); 160 timer.schedule( this, 0, 10000 ); 161 stopped = false; 162 LOG.logInfo( "harvesting has been started" ); 163 } 164 165 /** 166 * stops the harvesting process 167 * 168 */ 169 public void stopHarvesting() { 170 timer.purge(); 171 timer.cancel(); 172 stopped = true; 173 LOG.logInfo( "harvesting has been stopped" ); 174 } 175 176 /** 177 * informs all response handlers assigend to a source about successful harvesting of the source 178 * 179 * @param source 180 * @throws URISyntaxException 181 * @throws SQLException 182 * @throws DBPoolException 183 * @throws MalformedURLException 184 */ 185 protected void informResponseHandlers( URI source ) 186 throws IOException, DBPoolException, SQLException, URISyntaxException { 187 188 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source ); 189 190 for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) { 191 HarvestRepository.ResponseHandler handler = iter.next(); 192 String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" ); 193 sendMessage( handler, message ); 194 } 195 196 } 197 198 /** 199 * returns true if the passed source shall be harvested. this is true if a source has not been harvested before or 200 * the next harvesting timestamp has been reached and the source is of type 201 * 202 * @see HarvestRepository.ResourceType service 203 * 204 * @param source 205 * @return <code>true</code> if the passed source shall be harvested 206 * @throws DBPoolException 207 * @throws SQLException 208 */ 209 protected boolean shallHarvest( URI source, ResourceType targetType ) 210 throws IOException, DBPoolException, SQLException { 211 212 if ( inProgress.contains( source ) ) { 213 return false; 214 } 215 216 HarvestRepository repository = HarvestRepository.getInstance(); 217 218 ResourceType st = repository.getSourceType( source ); 219 220 if ( !st.equals( targetType ) ) { 221 return false; 222 } 223 224 Date lastHarvesting = repository.getLastHarvestingTimestamp( source ); 225 Date nextHarvesting = repository.getNextHarvestingTimestamp( source ); 226 227 long tmp = System.currentTimeMillis() - nextHarvesting.getTime(); 228 return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source ); 229 } 230 231 /** 232 * informs all response handlers assigend to a source about an exception that occurs when harvesting a source 233 * 234 * @param source 235 * @param e 236 * @throws URISyntaxException 237 * @throws SQLException 238 * @throws DBPoolException 239 */ 240 protected void informResponseHandlers( URI source, Throwable e ) 241 throws IOException, DBPoolException, SQLException, URISyntaxException { 242 243 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source ); 244 245 for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) { 246 HarvestRepository.ResponseHandler handler = iter.next(); 247 String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ", 248 e.getMessage() ); 249 sendMessage( handler, message ); 250 251 } 252 253 } 254 255 /** 256 * 257 * @param handler 258 * @param message 259 */ 260 private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) { 261 if ( handler.isMailAddress() ) { 262 String s = handler.getUri().toASCIIString(); 263 int p = s.indexOf( ":" ); 264 s = s.substring( p + 1, s.length() ); 265 LOG.logDebug( "sending message", message ); 266 LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) ); 267 MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message ); 268 try { 269 MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) ); 270 } catch ( Exception e ) { 271 LOG.logInfo( "could not send mail to admin:", e.getMessage() ); 272 LOG.logError( e.getMessage(), e ); 273 } 274 } else { 275 LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) ); 276 HttpClient client = new HttpClient(); 277 LOG.logDebug( message ); 278 279 try { 280 client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() ); 281 GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message ); 282 client.executeMethod( get ); 283 } catch ( Exception e ) { 284 LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: " 285 + e.getMessage() ); 286 } 287 288 } 289 } 290 291 /** 292 * abstract super class for all harvest processores 293 * 294 * 295 * @version $Revision: 21672 $ 296 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 297 * @author last edited by: $Author: apoth $ 298 * 299 * @version 1.0. $Revision: 21672 $, $Date: 2009-12-29 09:44:20 +0100 (Di, 29 Dez 2009) $ 300 * 301 * @since 2.0 302 */ 303 protected abstract class AbstractHarvestProcessor extends Thread { 304 305 protected URI source = null; 306 307 protected AbstractHarvester owner = null; 308 309 protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) { 310 this.owner = owner; 311 this.source = source; 312 } 313 314 /** 315 * performs a transaction for inserting or updating a service meta data record in the catalogue a harvester 316 * instance belongs too 317 * 318 * @param trans 319 * @throws SAXException 320 * @throws IOException 321 * @throws OGCWebServiceException 322 */ 323 protected void performTransaction( String trans ) 324 throws SAXException, IOException, OGCWebServiceException { 325 326 StringReader sr = new StringReader( trans ); 327 XMLFragment xml = new XMLFragment(); 328 xml.load( sr, XMLFragment.DEFAULT_URL ); 329 Transaction transaction = Transaction.create( "id", xml.getRootElement() ); 330 CSWFactory.getService().doService( transaction ); 331 332 } 333 334 /** 335 * creates a CSW Transaction including an Update operation for the passed meta data. 336 * 337 * @param identifier 338 * @param xpath 339 * @param metaData 340 * @return update request 341 * @throws IOException 342 */ 343 protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData ) 344 throws IOException { 345 // read template from file 346 URL url = Templates.getTemplate( "Update_" + version ); 347 String update = FileUtils.readTextFile( url ).toString(); 348 349 // remove XML doctype section to enable inserting the metadata set as string 350 // into the template 351 // TODO transform metaData into xml valid against the correct ISO AP version 352 String s = metaData.getAsString(); 353 int p = s.lastIndexOf( "?>" ); 354 if ( p > -1 ) { 355 s = s.substring( p + 2, s.length() ); 356 } 357 358 // set metadata set to update by replacing the string '$metadata$' 359 // in the template 360 update = StringTools.replace( update, "$metadata$", s, false ); 361 s = createConstraint( identifier, xpath ).toString(); 362 363 // set filter/constraint to identify the metadata set to be updated 364 // by replacing the string '$constraints$' in the template 365 return StringTools.replace( update, "$constraints$", s, false ); 366 } 367 368 /** 369 * creates a transaction request including a delete operation to remove the metadata record with the passed 370 * fileIdentifier from the catalogue 371 * 372 * @param identifier 373 * @return delete request 374 * @throws IOException 375 */ 376 protected String createDeleteRequest( String identifier ) 377 throws IOException { 378 // read template from file 379 URL url = Templates.getTemplate( "Delete_" + version ); 380 String delete = FileUtils.readTextFile( url ).toString(); 381 382 String xpath = Messages.getString( "IdentifierDelete_" + version ); 383 String s = createConstraint( identifier, xpath ).toString(); 384 385 // set filter/constraint to identify the metadata set to be deleted 386 // by replacing the string '$constraints$' in the template 387 return StringTools.replace( delete, "$constraints$", s, false ); 388 } 389 390 /** 391 * a constraint for delete und update operation depends on concrete metadata format. An implementing class must 392 * consider this. 393 * 394 * @param fileIdentifier 395 * value to be compared 396 * @param xpath 397 * comparable property 398 * @return the constraint 399 */ 400 protected abstract String createConstraint( String fileIdentifier, String xpath ) 401 throws IOException; 402 403 /** 404 * creates a CSW Transaction including an Update operation for the passed meta data 405 * 406 * @param metaData 407 * @return the request 408 * @throws IOException 409 */ 410 protected String createInsertRequest( XMLFragment metaData ) 411 throws IOException { 412 // read template from file 413 URL url = Templates.getTemplate( "Insert_" + version ); 414 String insert = FileUtils.readTextFile( url ).toString(); 415 416 // TODO transform metaData into xml valid against the correct ISO AP version 417 String s = metaData.getAsString(); 418 int p = s.lastIndexOf( "?>" ); 419 if ( p > -1 ) { 420 s = s.substring( p + 2, s.length() ); 421 } 422 423 // set metadata set to insert by replacing the string '$metadata$' 424 // in the template 425 return StringTools.replace( insert, "$metadata$", s, false ); 426 427 } 428 429 /** 430 * actualizes the source in the repository with timestamp of last harvesting 431 * 432 * @param source 433 * @param date 434 * @throws SQLException 435 * @throws DBPoolException 436 */ 437 protected void writeLastHarvestingTimestamp( URI source, Date date ) 438 throws IOException, DBPoolException, SQLException { 439 HarvestRepository repository = HarvestRepository.getInstance(); 440 repository.setLastHarvestingTimestamp( source, date ); 441 } 442 443 /** 444 * actualizes the source in the repository with timestamp when next harvesting shall be performed 445 * 446 * @param source 447 * @param date 448 * @throws SQLException 449 * @throws DBPoolException 450 */ 451 protected void writeNextHarvestingTimestamp( URI source, Date date ) 452 throws IOException, DBPoolException, SQLException { 453 HarvestRepository repository = HarvestRepository.getInstance(); 454 long ts = repository.getHarvestInterval( source ); 455 date = new Date( ts + date.getTime() ); 456 repository.setNextHarvestingTimestamp( source, date ); 457 } 458 459 } 460 461 /** 462 * returns the XPath the metadata records identifier 463 * 464 * @param metaData 465 * @return the XPath the metadata records identifier 466 */ 467 protected String getIdentifierXPath( XMLFragment metaData ) { 468 // default is iso 19115 469 String xpath = "iso19115:fileIdentifier/smXML:CharacterString"; 470 if ( metaData != null ) { 471 String nspace = metaData.getRootElement().getNamespaceURI(); 472 nspace = StringTools.replace( nspace, "http://", "", true ); 473 xpath = Messages.getString( "Identifier_" + nspace ); 474 } 475 return xpath; 476 } 477 478 /** 479 * returns the XPath the metadata records identifier 480 * 481 * @param metaData 482 * @return the XPath the metadata records identifier 483 */ 484 protected String getIdentifierXPathForUpdate( XMLFragment metaData ) { 485 String xpath = "iso19115:fileIdentifier/smXML:CharacterString"; 486 if ( metaData != null ) { 487 String nspace = metaData.getRootElement().getNamespaceURI(); 488 nspace = StringTools.replace( nspace, "http://", "", true ); 489 xpath = Messages.getString( "IdentifierUpdate_" + nspace ); 490 } 491 return xpath; 492 } 493 494 }