001 //$HeadURL: svn+ssh://jwilden@svn.wald.intevation.org/deegree/base/branches/2.5_testing/src/org/deegree/ogcwebservices/csw/manager/AbstractHarvester.java $
002 /*----------------------------------------------------------------------------
003 This file is part of deegree, http://deegree.org/
004 Copyright (C) 2001-2009 by:
005 Department of Geography, University of Bonn
006 and
007 lat/lon GmbH
008
009 This library is free software; you can redistribute it and/or modify it under
010 the terms of the GNU Lesser General Public License as published by the Free
011 Software Foundation; either version 2.1 of the License, or (at your option)
012 any later version.
013 This library is distributed in the hope that it will be useful, but WITHOUT
014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
015 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
016 details.
017 You should have received a copy of the GNU Lesser General Public License
018 along with this library; if not, write to the Free Software Foundation, Inc.,
019 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
020
021 Contact information:
022
023 lat/lon GmbH
024 Aennchenstr. 19, 53177 Bonn
025 Germany
026 http://lat-lon.de/
027
028 Department of Geography, University of Bonn
029 Prof. Dr. Klaus Greve
030 Postfach 1147, 53001 Bonn
031 Germany
032 http://www.geographie.uni-bonn.de/deegree/
033
034 e-mail: info@deegree.org
035 ----------------------------------------------------------------------------*/
036 package org.deegree.ogcwebservices.csw.manager;
037
038 import java.io.IOException;
039 import java.io.StringReader;
040 import java.net.MalformedURLException;
041 import java.net.URI;
042 import java.net.URISyntaxException;
043 import java.net.URL;
044 import java.sql.SQLException;
045 import java.util.Date;
046 import java.util.Iterator;
047 import java.util.List;
048 import java.util.Timer;
049 import java.util.TimerTask;
050 import java.util.Vector;
051
052 import org.apache.commons.httpclient.HttpClient;
053 import org.apache.commons.httpclient.methods.GetMethod;
054 import org.deegree.enterprise.WebUtils;
055 import org.deegree.framework.log.ILogger;
056 import org.deegree.framework.log.LoggerFactory;
057 import org.deegree.framework.mail.EMailMessage;
058 import org.deegree.framework.mail.MailHelper;
059 import org.deegree.framework.mail.MailMessage;
060 import org.deegree.framework.util.FileUtils;
061 import org.deegree.framework.util.StringTools;
062 import org.deegree.framework.xml.NamespaceContext;
063 import org.deegree.framework.xml.XMLFragment;
064 import org.deegree.io.DBPoolException;
065 import org.deegree.ogcbase.CommonNamespaces;
066 import org.deegree.ogcwebservices.OGCWebServiceException;
067 import org.deegree.ogcwebservices.csw.CSWFactory;
068 import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
069 import org.xml.sax.SAXException;
070
071 /**
072 * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester shall be implemented. A
073 * concrete implementation of AbstractHarvester will be called within a timer loop.
074 *
075 *
076 * @version $Revision: 21672 $
077 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
078 * @author last edited by: $Author: apoth $
079 *
080 * @version 1.0. $Revision: 21672 $, $Date: 2009-12-29 09:44:20 +0100 (Di, 29 Dez 2009) $
081 *
082 * @since 2.0
083 */
084 public abstract class AbstractHarvester extends TimerTask {
085
086 private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class );
087
088 private boolean stopped = true;
089
090 private Timer timer = null;
091
092 protected List<URI> inProgress = new Vector<URI>();
093
094 protected String version;
095
096 protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext();
097 static {
098 try {
099 nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) );
100 nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) );
101 nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) );
102 } catch ( URISyntaxException e ) {
103 e.printStackTrace();
104 }
105 }
106
107 /**
108 * @param version
109 * the version of the CSW
110 */
111 protected AbstractHarvester( String version ) {
112 this.version = version;
113 }
114
115 /**
116 * adds a request to the harvesting process
117 *
118 * @param request
119 * @throws IOException
120 * @throws SQLException
121 * @throws DBPoolException
122 */
123 public void addRequest( Harvest request )
124 throws IOException, DBPoolException, SQLException {
125 HarvestRepository.getInstance().storeRequest( request );
126 }
127
128 /**
129 * returns true if the harvesting process is running
130 *
131 * @return <code>true</code> if the harvesting process is running
132 */
133 public boolean isRunning() {
134 return !stopped;
135 }
136
137 /**
138 * removes a request from the harvesting request.
139 * <p>
140 * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic harvesting job, so this
141 * method can not be called with a standard OGC OWS request !!</b>
142 * </p>
143 *
144 * @param request
145 * @throws IOException
146 * @throws SQLException
147 * @throws DBPoolException
148 */
149 public void removeRequest( Harvest request )
150 throws IOException, DBPoolException, SQLException {
151 HarvestRepository.getInstance().dropRequest( request.getSource() );
152 }
153
154 /**
155 * starts the harvesting process
156 *
157 */
158 public void startHarvesting() {
159 timer = new Timer();
160 timer.schedule( this, 0, 10000 );
161 stopped = false;
162 LOG.logInfo( "harvesting has been started" );
163 }
164
165 /**
166 * stops the harvesting process
167 *
168 */
169 public void stopHarvesting() {
170 timer.purge();
171 timer.cancel();
172 stopped = true;
173 LOG.logInfo( "harvesting has been stopped" );
174 }
175
176 /**
177 * informs all response handlers assigend to a source about successful harvesting of the source
178 *
179 * @param source
180 * @throws URISyntaxException
181 * @throws SQLException
182 * @throws DBPoolException
183 * @throws MalformedURLException
184 */
185 protected void informResponseHandlers( URI source )
186 throws IOException, DBPoolException, SQLException, URISyntaxException {
187
188 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
189
190 for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
191 HarvestRepository.ResponseHandler handler = iter.next();
192 String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" );
193 sendMessage( handler, message );
194 }
195
196 }
197
198 /**
199 * returns true if the passed source shall be harvested. this is true if a source has not been harvested before or
200 * the next harvesting timestamp has been reached and the source is of type
201 *
202 * @see HarvestRepository.ResourceType service
203 *
204 * @param source
205 * @return <code>true</code> if the passed source shall be harvested
206 * @throws DBPoolException
207 * @throws SQLException
208 */
209 protected boolean shallHarvest( URI source, ResourceType targetType )
210 throws IOException, DBPoolException, SQLException {
211
212 if ( inProgress.contains( source ) ) {
213 return false;
214 }
215
216 HarvestRepository repository = HarvestRepository.getInstance();
217
218 ResourceType st = repository.getSourceType( source );
219
220 if ( !st.equals( targetType ) ) {
221 return false;
222 }
223
224 Date lastHarvesting = repository.getLastHarvestingTimestamp( source );
225 Date nextHarvesting = repository.getNextHarvestingTimestamp( source );
226
227 long tmp = System.currentTimeMillis() - nextHarvesting.getTime();
228 return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source );
229 }
230
231 /**
232 * informs all response handlers assigend to a source about an exception that occurs when harvesting a source
233 *
234 * @param source
235 * @param e
236 * @throws URISyntaxException
237 * @throws SQLException
238 * @throws DBPoolException
239 */
240 protected void informResponseHandlers( URI source, Throwable e )
241 throws IOException, DBPoolException, SQLException, URISyntaxException {
242
243 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
244
245 for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
246 HarvestRepository.ResponseHandler handler = iter.next();
247 String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ",
248 e.getMessage() );
249 sendMessage( handler, message );
250
251 }
252
253 }
254
255 /**
256 *
257 * @param handler
258 * @param message
259 */
260 private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) {
261 if ( handler.isMailAddress() ) {
262 String s = handler.getUri().toASCIIString();
263 int p = s.indexOf( ":" );
264 s = s.substring( p + 1, s.length() );
265 LOG.logDebug( "sending message", message );
266 LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) );
267 MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message );
268 try {
269 MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) );
270 } catch ( Exception e ) {
271 LOG.logInfo( "could not send mail to admin:", e.getMessage() );
272 LOG.logError( e.getMessage(), e );
273 }
274 } else {
275 LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) );
276 HttpClient client = new HttpClient();
277 LOG.logDebug( message );
278
279 try {
280 client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() );
281 GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message );
282 client.executeMethod( get );
283 } catch ( Exception e ) {
284 LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: "
285 + e.getMessage() );
286 }
287
288 }
289 }
290
291 /**
292 * abstract super class for all harvest processores
293 *
294 *
295 * @version $Revision: 21672 $
296 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
297 * @author last edited by: $Author: apoth $
298 *
299 * @version 1.0. $Revision: 21672 $, $Date: 2009-12-29 09:44:20 +0100 (Di, 29 Dez 2009) $
300 *
301 * @since 2.0
302 */
303 protected abstract class AbstractHarvestProcessor extends Thread {
304
305 protected URI source = null;
306
307 protected AbstractHarvester owner = null;
308
309 protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) {
310 this.owner = owner;
311 this.source = source;
312 }
313
314 /**
315 * performs a transaction for inserting or updating a service meta data record in the catalogue a harvester
316 * instance belongs too
317 *
318 * @param trans
319 * @throws SAXException
320 * @throws IOException
321 * @throws OGCWebServiceException
322 */
323 protected void performTransaction( String trans )
324 throws SAXException, IOException, OGCWebServiceException {
325
326 StringReader sr = new StringReader( trans );
327 XMLFragment xml = new XMLFragment();
328 xml.load( sr, XMLFragment.DEFAULT_URL );
329 Transaction transaction = Transaction.create( "id", xml.getRootElement() );
330 CSWFactory.getService().doService( transaction );
331
332 }
333
334 /**
335 * creates a CSW Transaction including an Update operation for the passed meta data.
336 *
337 * @param identifier
338 * @param xpath
339 * @param metaData
340 * @return update request
341 * @throws IOException
342 */
343 protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData )
344 throws IOException {
345 // read template from file
346 URL url = Templates.getTemplate( "Update_" + version );
347 String update = FileUtils.readTextFile( url ).toString();
348
349 // remove XML doctype section to enable inserting the metadata set as string
350 // into the template
351 // TODO transform metaData into xml valid against the correct ISO AP version
352 String s = metaData.getAsString();
353 int p = s.lastIndexOf( "?>" );
354 if ( p > -1 ) {
355 s = s.substring( p + 2, s.length() );
356 }
357
358 // set metadata set to update by replacing the string '$metadata$'
359 // in the template
360 update = StringTools.replace( update, "$metadata$", s, false );
361 s = createConstraint( identifier, xpath ).toString();
362
363 // set filter/constraint to identify the metadata set to be updated
364 // by replacing the string '$constraints$' in the template
365 return StringTools.replace( update, "$constraints$", s, false );
366 }
367
368 /**
369 * creates a transaction request including a delete operation to remove the metadata record with the passed
370 * fileIdentifier from the catalogue
371 *
372 * @param identifier
373 * @return delete request
374 * @throws IOException
375 */
376 protected String createDeleteRequest( String identifier )
377 throws IOException {
378 // read template from file
379 URL url = Templates.getTemplate( "Delete_" + version );
380 String delete = FileUtils.readTextFile( url ).toString();
381
382 String xpath = Messages.getString( "IdentifierDelete_" + version );
383 String s = createConstraint( identifier, xpath ).toString();
384
385 // set filter/constraint to identify the metadata set to be deleted
386 // by replacing the string '$constraints$' in the template
387 return StringTools.replace( delete, "$constraints$", s, false );
388 }
389
390 /**
391 * a constraint for delete und update operation depends on concrete metadata format. An implementing class must
392 * consider this.
393 *
394 * @param fileIdentifier
395 * value to be compared
396 * @param xpath
397 * comparable property
398 * @return the constraint
399 */
400 protected abstract String createConstraint( String fileIdentifier, String xpath )
401 throws IOException;
402
403 /**
404 * creates a CSW Transaction including an Update operation for the passed meta data
405 *
406 * @param metaData
407 * @return the request
408 * @throws IOException
409 */
410 protected String createInsertRequest( XMLFragment metaData )
411 throws IOException {
412 // read template from file
413 URL url = Templates.getTemplate( "Insert_" + version );
414 String insert = FileUtils.readTextFile( url ).toString();
415
416 // TODO transform metaData into xml valid against the correct ISO AP version
417 String s = metaData.getAsString();
418 int p = s.lastIndexOf( "?>" );
419 if ( p > -1 ) {
420 s = s.substring( p + 2, s.length() );
421 }
422
423 // set metadata set to insert by replacing the string '$metadata$'
424 // in the template
425 return StringTools.replace( insert, "$metadata$", s, false );
426
427 }
428
429 /**
430 * actualizes the source in the repository with timestamp of last harvesting
431 *
432 * @param source
433 * @param date
434 * @throws SQLException
435 * @throws DBPoolException
436 */
437 protected void writeLastHarvestingTimestamp( URI source, Date date )
438 throws IOException, DBPoolException, SQLException {
439 HarvestRepository repository = HarvestRepository.getInstance();
440 repository.setLastHarvestingTimestamp( source, date );
441 }
442
443 /**
444 * actualizes the source in the repository with timestamp when next harvesting shall be performed
445 *
446 * @param source
447 * @param date
448 * @throws SQLException
449 * @throws DBPoolException
450 */
451 protected void writeNextHarvestingTimestamp( URI source, Date date )
452 throws IOException, DBPoolException, SQLException {
453 HarvestRepository repository = HarvestRepository.getInstance();
454 long ts = repository.getHarvestInterval( source );
455 date = new Date( ts + date.getTime() );
456 repository.setNextHarvestingTimestamp( source, date );
457 }
458
459 }
460
461 /**
462 * returns the XPath the metadata records identifier
463 *
464 * @param metaData
465 * @return the XPath the metadata records identifier
466 */
467 protected String getIdentifierXPath( XMLFragment metaData ) {
468 // default is iso 19115
469 String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
470 if ( metaData != null ) {
471 String nspace = metaData.getRootElement().getNamespaceURI();
472 nspace = StringTools.replace( nspace, "http://", "", true );
473 xpath = Messages.getString( "Identifier_" + nspace );
474 }
475 return xpath;
476 }
477
478 /**
479 * returns the XPath the metadata records identifier
480 *
481 * @param metaData
482 * @return the XPath the metadata records identifier
483 */
484 protected String getIdentifierXPathForUpdate( XMLFragment metaData ) {
485 String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
486 if ( metaData != null ) {
487 String nspace = metaData.getRootElement().getNamespaceURI();
488 nspace = StringTools.replace( nspace, "http://", "", true );
489 xpath = Messages.getString( "IdentifierUpdate_" + nspace );
490 }
491 return xpath;
492 }
493
494 }