001 //$HeadURL: https://svn.wald.intevation.org/svn/deegree/base/branches/2.3_testing/src/org/deegree/ogcwebservices/csw/manager/AbstractHarvester.java $ 002 /*---------------------------------------------------------------------------- 003 This file is part of deegree, http://deegree.org/ 004 Copyright (C) 2001-2009 by: 005 Department of Geography, University of Bonn 006 and 007 lat/lon GmbH 008 009 This library is free software; you can redistribute it and/or modify it under 010 the terms of the GNU Lesser General Public License as published by the Free 011 Software Foundation; either version 2.1 of the License, or (at your option) 012 any later version. 013 This library is distributed in the hope that it will be useful, but WITHOUT 014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 015 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 016 details. 017 You should have received a copy of the GNU Lesser General Public License 018 along with this library; if not, write to the Free Software Foundation, Inc., 019 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 020 021 Contact information: 022 023 lat/lon GmbH 024 Aennchenstr. 19, 53177 Bonn 025 Germany 026 http://lat-lon.de/ 027 028 Department of Geography, University of Bonn 029 Prof. Dr. Klaus Greve 030 Postfach 1147, 53001 Bonn 031 Germany 032 http://www.geographie.uni-bonn.de/deegree/ 033 034 e-mail: info@deegree.org 035 ----------------------------------------------------------------------------*/ 036 package org.deegree.ogcwebservices.csw.manager; 037 038 import java.io.IOException; 039 import java.io.StringReader; 040 import java.net.MalformedURLException; 041 import java.net.URI; 042 import java.net.URISyntaxException; 043 import java.net.URL; 044 import java.sql.SQLException; 045 import java.util.Date; 046 import java.util.Iterator; 047 import java.util.List; 048 import java.util.Timer; 049 import java.util.TimerTask; 050 import java.util.Vector; 051 052 import org.apache.commons.httpclient.HttpClient; 053 import org.apache.commons.httpclient.methods.GetMethod; 054 import org.deegree.enterprise.WebUtils; 055 import org.deegree.framework.log.ILogger; 056 import org.deegree.framework.log.LoggerFactory; 057 import org.deegree.framework.mail.EMailMessage; 058 import org.deegree.framework.mail.MailHelper; 059 import org.deegree.framework.mail.MailMessage; 060 import org.deegree.framework.util.FileUtils; 061 import org.deegree.framework.util.StringTools; 062 import org.deegree.framework.xml.NamespaceContext; 063 import org.deegree.framework.xml.XMLFragment; 064 import org.deegree.io.DBPoolException; 065 import org.deegree.ogcbase.CommonNamespaces; 066 import org.deegree.ogcwebservices.OGCWebServiceException; 067 import org.deegree.ogcwebservices.csw.CSWFactory; 068 import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType; 069 import org.xml.sax.SAXException; 070 071 /** 072 * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester 073 * shall be implemented. A concrete implementation of AbstractHarvester will be called within a 074 * timer loop. 075 * 076 * 077 * @version $Revision: 19475 $ 078 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 079 * @author last edited by: $Author: lbuesching $ 080 * 081 * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $ 082 * 083 * @since 2.0 084 */ 085 public abstract class AbstractHarvester extends TimerTask { 086 087 private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class ); 088 089 private boolean stopped = true; 090 091 private Timer timer = null; 092 093 protected List<URI> inProgress = new Vector<URI>(); 094 095 protected String version; 096 097 protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext(); 098 static { 099 try { 100 nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) ); 101 nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) ); 102 nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) ); 103 } catch ( URISyntaxException e ) { 104 e.printStackTrace(); 105 } 106 } 107 108 /** 109 * @param version 110 * the version of the CSW 111 */ 112 protected AbstractHarvester( String version ) { 113 this.version = version; 114 } 115 116 /** 117 * adds a request to the harvesting process 118 * 119 * @param request 120 * @throws IOException 121 * @throws SQLException 122 * @throws DBPoolException 123 */ 124 public void addRequest( Harvest request ) 125 throws IOException, DBPoolException, SQLException { 126 HarvestRepository.getInstance().storeRequest( request ); 127 } 128 129 /** 130 * returns true if the harvesting process is running 131 * 132 * @return <code>true</code> if the harvesting process is running 133 */ 134 public boolean isRunning() { 135 return !stopped; 136 } 137 138 /** 139 * removes a request from the harvesting request. 140 * <p> 141 * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic 142 * harvesting job, so this method can not be called with a standard OGC OWS request !!</b> 143 * </p> 144 * 145 * @param request 146 * @throws IOException 147 * @throws SQLException 148 * @throws DBPoolException 149 */ 150 public void removeRequest( Harvest request ) 151 throws IOException, DBPoolException, SQLException { 152 HarvestRepository.getInstance().dropRequest( request.getSource() ); 153 } 154 155 /** 156 * starts the harvesting process 157 * 158 */ 159 public void startHarvesting() { 160 timer = new Timer(); 161 timer.schedule( this, 0, 10000 ); 162 stopped = false; 163 LOG.logInfo( "harvesting has been started" ); 164 } 165 166 /** 167 * stops the harvesting process 168 * 169 */ 170 public void stopHarvesting() { 171 timer.purge(); 172 timer.cancel(); 173 stopped = true; 174 LOG.logInfo( "harvesting has been stopped" ); 175 } 176 177 /** 178 * informs all response handlers assigend to a source about successful harvesting of the source 179 * 180 * @param source 181 * @throws URISyntaxException 182 * @throws SQLException 183 * @throws DBPoolException 184 * @throws MalformedURLException 185 */ 186 protected void informResponseHandlers( URI source ) 187 throws IOException, DBPoolException, SQLException, URISyntaxException { 188 189 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source ); 190 191 for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) { 192 HarvestRepository.ResponseHandler handler = iter.next(); 193 String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" ); 194 sendMessage( handler, message ); 195 } 196 197 } 198 199 /** 200 * returns true if the passed source shall be harvested. this is true if a source has not been 201 * harvested before or the next harvesting timestamp has been reached and the source is of type 202 * 203 * @see HarvestRepository.ResourceType service 204 * 205 * @param source 206 * @return <code>true</code> if the passed source shall be harvested 207 * @throws DBPoolException 208 * @throws SQLException 209 */ 210 protected boolean shallHarvest( URI source, ResourceType targetType ) 211 throws IOException, DBPoolException, SQLException { 212 213 if ( inProgress.contains( source ) ) { 214 return false; 215 } 216 217 HarvestRepository repository = HarvestRepository.getInstance(); 218 219 ResourceType st = repository.getSourceType( source ); 220 221 if ( !st.equals( targetType ) ) { 222 return false; 223 } 224 225 Date lastHarvesting = repository.getLastHarvestingTimestamp( source ); 226 Date nextHarvesting = repository.getNextHarvestingTimestamp( source ); 227 228 long tmp = System.currentTimeMillis() - nextHarvesting.getTime(); 229 return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source ); 230 } 231 232 /** 233 * informs all response handlers assigend to a source about an exception that occurs when 234 * harvesting a source 235 * 236 * @param source 237 * @param e 238 * @throws URISyntaxException 239 * @throws SQLException 240 * @throws DBPoolException 241 */ 242 protected void informResponseHandlers( URI source, Throwable e ) 243 throws IOException, DBPoolException, SQLException, URISyntaxException { 244 245 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source ); 246 247 for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) { 248 HarvestRepository.ResponseHandler handler = iter.next(); 249 String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ", 250 e.getMessage() ); 251 sendMessage( handler, message ); 252 253 } 254 255 } 256 257 /** 258 * 259 * @param handler 260 * @param message 261 */ 262 private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) { 263 if ( handler.isMailAddress() ) { 264 String s = handler.getUri().toASCIIString(); 265 int p = s.indexOf( ":" ); 266 s = s.substring( p + 1, s.length() ); 267 LOG.logDebug( "sending message", message ); 268 LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) ); 269 MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message ); 270 try { 271 MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) ); 272 } catch ( Exception e ) { 273 LOG.logInfo( "could not send mail to admin:", e.getMessage() ); 274 LOG.logError( e.getMessage(), e ); 275 } 276 } else { 277 LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) ); 278 HttpClient client = new HttpClient(); 279 LOG.logDebug( message ); 280 281 try { 282 client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() ); 283 GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message ); 284 client.executeMethod( get ); 285 } catch ( Exception e ) { 286 LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: " 287 + e.getMessage() ); 288 } 289 290 } 291 } 292 293 /** 294 * abstract super class for all harvest processores 295 * 296 * 297 * @version $Revision: 19475 $ 298 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 299 * @author last edited by: $Author: lbuesching $ 300 * 301 * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $ 302 * 303 * @since 2.0 304 */ 305 protected abstract class AbstractHarvestProcessor extends Thread { 306 307 protected URI source = null; 308 309 protected AbstractHarvester owner = null; 310 311 protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) { 312 this.owner = owner; 313 this.source = source; 314 } 315 316 /** 317 * performs a transaction for inserting or updating a service meta data record in the 318 * catalogue a harvester instance belongs too 319 * 320 * @param trans 321 * @throws SAXException 322 * @throws IOException 323 * @throws OGCWebServiceException 324 */ 325 protected void performTransaction( String trans ) 326 throws SAXException, IOException, OGCWebServiceException { 327 328 StringReader sr = new StringReader( trans ); 329 XMLFragment xml = new XMLFragment(); 330 xml.load( sr, XMLFragment.DEFAULT_URL ); 331 Transaction transaction = Transaction.create( "id", xml.getRootElement() ); 332 CSWFactory.getService().doService( transaction ); 333 334 } 335 336 /** 337 * creates a CSW Transaction including an Update operation for the passed meta data. 338 * 339 * @param identifier 340 * @param xpath 341 * @param metaData 342 * @return update request 343 * @throws IOException 344 */ 345 protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData ) 346 throws IOException { 347 // read template from file 348 URL url = Templates.getTemplate( "Update_" + version ); 349 String update = FileUtils.readTextFile( url ).toString(); 350 351 // remove XML doctype section to enable inserting the metadata set as string 352 // into the template 353 // TODO transform metaData into xml valid against the correct ISO AP version 354 String s = metaData.getAsString(); 355 int p = s.lastIndexOf( "?>" ); 356 if ( p > -1 ) { 357 s = s.substring( p + 2, s.length() ); 358 } 359 360 // set metadata set to update by replacing the string '$metadata$' 361 // in the template 362 update = StringTools.replace( update, "$metadata$", s, false ); 363 s = createConstraint( identifier, xpath ).toString(); 364 365 // set filter/constraint to identify the metadata set to be updated 366 // by replacing the string '$constraints$' in the template 367 return StringTools.replace( update, "$constraints$", s, false ); 368 } 369 370 /** 371 * creates a transaction request including a delete operation to remove the metadata record 372 * with the passed fileIdentifier from the catalogue 373 * 374 * @param identifier 375 * @param xpath 376 * @return delete request 377 * @throws IOException 378 */ 379 protected String createDeleteRequest( String identifier ) 380 throws IOException { 381 // read template from file 382 URL url = Templates.getTemplate( "Delete_" + version ); 383 String delete = FileUtils.readTextFile( url ).toString(); 384 385 String xpath = Messages.getString( "IdentifierDelete_" + version ); 386 String s = createConstraint( identifier, xpath ).toString(); 387 388 // set filter/constraint to identify the metadata set to be deleted 389 // by replacing the string '$constraints$' in the template 390 return StringTools.replace( delete, "$constraints$", s, false ); 391 } 392 393 /** 394 * a constraint for delete und update operation depends on concrete metadata format. An 395 * implementing class must consider this. 396 * 397 * @param fileIdentifier 398 * value to be compared 399 * @param xpath 400 * comparable property 401 * @return the constraint 402 */ 403 protected abstract String createConstraint( String fileIdentifier, String xpath ) 404 throws IOException; 405 406 /** 407 * creates a CSW Transaction including an Update operation for the passed meta data 408 * 409 * @param metaData 410 * @return the request 411 * @throws IOException 412 */ 413 protected String createInsertRequest( XMLFragment metaData ) 414 throws IOException { 415 // read template from file 416 URL url = Templates.getTemplate( "Insert_" + version ); 417 String insert = FileUtils.readTextFile( url ).toString(); 418 419 // TODO transform metaData into xml valid against the correct ISO AP version 420 String s = metaData.getAsString(); 421 int p = s.lastIndexOf( "?>" ); 422 if ( p > -1 ) { 423 s = s.substring( p + 2, s.length() ); 424 } 425 426 // set metadata set to insert by replacing the string '$metadata$' 427 // in the template 428 return StringTools.replace( insert, "$metadata$", s, false ); 429 430 } 431 432 /** 433 * actualizes the source in the repository with timestamp of last harvesting 434 * 435 * @param source 436 * @param date 437 * @throws SQLException 438 * @throws DBPoolException 439 */ 440 protected void writeLastHarvestingTimestamp( URI source, Date date ) 441 throws IOException, DBPoolException, SQLException { 442 HarvestRepository repository = HarvestRepository.getInstance(); 443 repository.setLastHarvestingTimestamp( source, date ); 444 } 445 446 /** 447 * actualizes the source in the repository with timestamp when next harvesting shall be 448 * performed 449 * 450 * @param source 451 * @param date 452 * @throws SQLException 453 * @throws DBPoolException 454 */ 455 protected void writeNextHarvestingTimestamp( URI source, Date date ) 456 throws IOException, DBPoolException, SQLException { 457 HarvestRepository repository = HarvestRepository.getInstance(); 458 long ts = repository.getHarvestInterval( source ); 459 date = new Date( ts + date.getTime() ); 460 repository.setNextHarvestingTimestamp( source, date ); 461 } 462 463 } 464 465 /** 466 * returns the XPath the metadata records identifier 467 * 468 * @param metaData 469 * @return the XPath the metadata records identifier 470 */ 471 protected String getIdentifierXPath( XMLFragment metaData ) { 472 // default is iso 19115 473 String xpath = "iso19115:fileIdentifier/smXML:CharacterString"; 474 if ( metaData != null ) { 475 String nspace = metaData.getRootElement().getNamespaceURI(); 476 nspace = StringTools.replace( nspace, "http://", "", true ); 477 xpath = Messages.getString( "Identifier_" + nspace ); 478 } 479 return xpath; 480 } 481 482 /** 483 * returns the XPath the metadata records identifier 484 * 485 * @param metaData 486 * @return the XPath the metadata records identifier 487 */ 488 protected String getIdentifierXPathForUpdate( XMLFragment metaData ) { 489 String xpath = "iso19115:fileIdentifier/smXML:CharacterString"; 490 if ( metaData != null ) { 491 String nspace = metaData.getRootElement().getNamespaceURI(); 492 nspace = StringTools.replace( nspace, "http://", "", true ); 493 xpath = Messages.getString( "IdentifierUpdate_" + nspace ); 494 } 495 return xpath; 496 } 497 498 }