001 //$HeadURL: https://svn.wald.intevation.org/svn/deegree/base/branches/2.3_testing/src/org/deegree/ogcwebservices/csw/manager/AbstractHarvester.java $
002 /*----------------------------------------------------------------------------
003 This file is part of deegree, http://deegree.org/
004 Copyright (C) 2001-2009 by:
005 Department of Geography, University of Bonn
006 and
007 lat/lon GmbH
008
009 This library is free software; you can redistribute it and/or modify it under
010 the terms of the GNU Lesser General Public License as published by the Free
011 Software Foundation; either version 2.1 of the License, or (at your option)
012 any later version.
013 This library is distributed in the hope that it will be useful, but WITHOUT
014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
015 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
016 details.
017 You should have received a copy of the GNU Lesser General Public License
018 along with this library; if not, write to the Free Software Foundation, Inc.,
019 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
020
021 Contact information:
022
023 lat/lon GmbH
024 Aennchenstr. 19, 53177 Bonn
025 Germany
026 http://lat-lon.de/
027
028 Department of Geography, University of Bonn
029 Prof. Dr. Klaus Greve
030 Postfach 1147, 53001 Bonn
031 Germany
032 http://www.geographie.uni-bonn.de/deegree/
033
034 e-mail: info@deegree.org
035 ----------------------------------------------------------------------------*/
036 package org.deegree.ogcwebservices.csw.manager;
037
038 import java.io.IOException;
039 import java.io.StringReader;
040 import java.net.MalformedURLException;
041 import java.net.URI;
042 import java.net.URISyntaxException;
043 import java.net.URL;
044 import java.sql.SQLException;
045 import java.util.Date;
046 import java.util.Iterator;
047 import java.util.List;
048 import java.util.Timer;
049 import java.util.TimerTask;
050 import java.util.Vector;
051
052 import org.apache.commons.httpclient.HttpClient;
053 import org.apache.commons.httpclient.methods.GetMethod;
054 import org.deegree.enterprise.WebUtils;
055 import org.deegree.framework.log.ILogger;
056 import org.deegree.framework.log.LoggerFactory;
057 import org.deegree.framework.mail.EMailMessage;
058 import org.deegree.framework.mail.MailHelper;
059 import org.deegree.framework.mail.MailMessage;
060 import org.deegree.framework.util.FileUtils;
061 import org.deegree.framework.util.StringTools;
062 import org.deegree.framework.xml.NamespaceContext;
063 import org.deegree.framework.xml.XMLFragment;
064 import org.deegree.io.DBPoolException;
065 import org.deegree.ogcbase.CommonNamespaces;
066 import org.deegree.ogcwebservices.OGCWebServiceException;
067 import org.deegree.ogcwebservices.csw.CSWFactory;
068 import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
069 import org.xml.sax.SAXException;
070
071 /**
072 * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester
073 * shall be implemented. A concrete implementation of AbstractHarvester will be called within a
074 * timer loop.
075 *
076 *
077 * @version $Revision: 19475 $
078 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
079 * @author last edited by: $Author: lbuesching $
080 *
081 * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $
082 *
083 * @since 2.0
084 */
085 public abstract class AbstractHarvester extends TimerTask {
086
087 private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class );
088
089 private boolean stopped = true;
090
091 private Timer timer = null;
092
093 protected List<URI> inProgress = new Vector<URI>();
094
095 protected String version;
096
097 protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext();
098 static {
099 try {
100 nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) );
101 nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) );
102 nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) );
103 } catch ( URISyntaxException e ) {
104 e.printStackTrace();
105 }
106 }
107
108 /**
109 * @param version
110 * the version of the CSW
111 */
112 protected AbstractHarvester( String version ) {
113 this.version = version;
114 }
115
116 /**
117 * adds a request to the harvesting process
118 *
119 * @param request
120 * @throws IOException
121 * @throws SQLException
122 * @throws DBPoolException
123 */
124 public void addRequest( Harvest request )
125 throws IOException, DBPoolException, SQLException {
126 HarvestRepository.getInstance().storeRequest( request );
127 }
128
129 /**
130 * returns true if the harvesting process is running
131 *
132 * @return <code>true</code> if the harvesting process is running
133 */
134 public boolean isRunning() {
135 return !stopped;
136 }
137
138 /**
139 * removes a request from the harvesting request.
140 * <p>
141 * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic
142 * harvesting job, so this method can not be called with a standard OGC OWS request !!</b>
143 * </p>
144 *
145 * @param request
146 * @throws IOException
147 * @throws SQLException
148 * @throws DBPoolException
149 */
150 public void removeRequest( Harvest request )
151 throws IOException, DBPoolException, SQLException {
152 HarvestRepository.getInstance().dropRequest( request.getSource() );
153 }
154
155 /**
156 * starts the harvesting process
157 *
158 */
159 public void startHarvesting() {
160 timer = new Timer();
161 timer.schedule( this, 0, 10000 );
162 stopped = false;
163 LOG.logInfo( "harvesting has been started" );
164 }
165
166 /**
167 * stops the harvesting process
168 *
169 */
170 public void stopHarvesting() {
171 timer.purge();
172 timer.cancel();
173 stopped = true;
174 LOG.logInfo( "harvesting has been stopped" );
175 }
176
177 /**
178 * informs all response handlers assigend to a source about successful harvesting of the source
179 *
180 * @param source
181 * @throws URISyntaxException
182 * @throws SQLException
183 * @throws DBPoolException
184 * @throws MalformedURLException
185 */
186 protected void informResponseHandlers( URI source )
187 throws IOException, DBPoolException, SQLException, URISyntaxException {
188
189 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
190
191 for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
192 HarvestRepository.ResponseHandler handler = iter.next();
193 String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" );
194 sendMessage( handler, message );
195 }
196
197 }
198
199 /**
200 * returns true if the passed source shall be harvested. this is true if a source has not been
201 * harvested before or the next harvesting timestamp has been reached and the source is of type
202 *
203 * @see HarvestRepository.ResourceType service
204 *
205 * @param source
206 * @return <code>true</code> if the passed source shall be harvested
207 * @throws DBPoolException
208 * @throws SQLException
209 */
210 protected boolean shallHarvest( URI source, ResourceType targetType )
211 throws IOException, DBPoolException, SQLException {
212
213 if ( inProgress.contains( source ) ) {
214 return false;
215 }
216
217 HarvestRepository repository = HarvestRepository.getInstance();
218
219 ResourceType st = repository.getSourceType( source );
220
221 if ( !st.equals( targetType ) ) {
222 return false;
223 }
224
225 Date lastHarvesting = repository.getLastHarvestingTimestamp( source );
226 Date nextHarvesting = repository.getNextHarvestingTimestamp( source );
227
228 long tmp = System.currentTimeMillis() - nextHarvesting.getTime();
229 return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source );
230 }
231
232 /**
233 * informs all response handlers assigend to a source about an exception that occurs when
234 * harvesting a source
235 *
236 * @param source
237 * @param e
238 * @throws URISyntaxException
239 * @throws SQLException
240 * @throws DBPoolException
241 */
242 protected void informResponseHandlers( URI source, Throwable e )
243 throws IOException, DBPoolException, SQLException, URISyntaxException {
244
245 List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
246
247 for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
248 HarvestRepository.ResponseHandler handler = iter.next();
249 String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ",
250 e.getMessage() );
251 sendMessage( handler, message );
252
253 }
254
255 }
256
257 /**
258 *
259 * @param handler
260 * @param message
261 */
262 private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) {
263 if ( handler.isMailAddress() ) {
264 String s = handler.getUri().toASCIIString();
265 int p = s.indexOf( ":" );
266 s = s.substring( p + 1, s.length() );
267 LOG.logDebug( "sending message", message );
268 LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) );
269 MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message );
270 try {
271 MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) );
272 } catch ( Exception e ) {
273 LOG.logInfo( "could not send mail to admin:", e.getMessage() );
274 LOG.logError( e.getMessage(), e );
275 }
276 } else {
277 LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) );
278 HttpClient client = new HttpClient();
279 LOG.logDebug( message );
280
281 try {
282 client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() );
283 GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message );
284 client.executeMethod( get );
285 } catch ( Exception e ) {
286 LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: "
287 + e.getMessage() );
288 }
289
290 }
291 }
292
293 /**
294 * abstract super class for all harvest processores
295 *
296 *
297 * @version $Revision: 19475 $
298 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
299 * @author last edited by: $Author: lbuesching $
300 *
301 * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $
302 *
303 * @since 2.0
304 */
305 protected abstract class AbstractHarvestProcessor extends Thread {
306
307 protected URI source = null;
308
309 protected AbstractHarvester owner = null;
310
311 protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) {
312 this.owner = owner;
313 this.source = source;
314 }
315
316 /**
317 * performs a transaction for inserting or updating a service meta data record in the
318 * catalogue a harvester instance belongs too
319 *
320 * @param trans
321 * @throws SAXException
322 * @throws IOException
323 * @throws OGCWebServiceException
324 */
325 protected void performTransaction( String trans )
326 throws SAXException, IOException, OGCWebServiceException {
327
328 StringReader sr = new StringReader( trans );
329 XMLFragment xml = new XMLFragment();
330 xml.load( sr, XMLFragment.DEFAULT_URL );
331 Transaction transaction = Transaction.create( "id", xml.getRootElement() );
332 CSWFactory.getService().doService( transaction );
333
334 }
335
336 /**
337 * creates a CSW Transaction including an Update operation for the passed meta data.
338 *
339 * @param identifier
340 * @param xpath
341 * @param metaData
342 * @return update request
343 * @throws IOException
344 */
345 protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData )
346 throws IOException {
347 // read template from file
348 URL url = Templates.getTemplate( "Update_" + version );
349 String update = FileUtils.readTextFile( url ).toString();
350
351 // remove XML doctype section to enable inserting the metadata set as string
352 // into the template
353 // TODO transform metaData into xml valid against the correct ISO AP version
354 String s = metaData.getAsString();
355 int p = s.lastIndexOf( "?>" );
356 if ( p > -1 ) {
357 s = s.substring( p + 2, s.length() );
358 }
359
360 // set metadata set to update by replacing the string '$metadata$'
361 // in the template
362 update = StringTools.replace( update, "$metadata$", s, false );
363 s = createConstraint( identifier, xpath ).toString();
364
365 // set filter/constraint to identify the metadata set to be updated
366 // by replacing the string '$constraints$' in the template
367 return StringTools.replace( update, "$constraints$", s, false );
368 }
369
370 /**
371 * creates a transaction request including a delete operation to remove the metadata record
372 * with the passed fileIdentifier from the catalogue
373 *
374 * @param identifier
375 * @param xpath
376 * @return delete request
377 * @throws IOException
378 */
379 protected String createDeleteRequest( String identifier )
380 throws IOException {
381 // read template from file
382 URL url = Templates.getTemplate( "Delete_" + version );
383 String delete = FileUtils.readTextFile( url ).toString();
384
385 String xpath = Messages.getString( "IdentifierDelete_" + version );
386 String s = createConstraint( identifier, xpath ).toString();
387
388 // set filter/constraint to identify the metadata set to be deleted
389 // by replacing the string '$constraints$' in the template
390 return StringTools.replace( delete, "$constraints$", s, false );
391 }
392
393 /**
394 * a constraint for delete und update operation depends on concrete metadata format. An
395 * implementing class must consider this.
396 *
397 * @param fileIdentifier
398 * value to be compared
399 * @param xpath
400 * comparable property
401 * @return the constraint
402 */
403 protected abstract String createConstraint( String fileIdentifier, String xpath )
404 throws IOException;
405
406 /**
407 * creates a CSW Transaction including an Update operation for the passed meta data
408 *
409 * @param metaData
410 * @return the request
411 * @throws IOException
412 */
413 protected String createInsertRequest( XMLFragment metaData )
414 throws IOException {
415 // read template from file
416 URL url = Templates.getTemplate( "Insert_" + version );
417 String insert = FileUtils.readTextFile( url ).toString();
418
419 // TODO transform metaData into xml valid against the correct ISO AP version
420 String s = metaData.getAsString();
421 int p = s.lastIndexOf( "?>" );
422 if ( p > -1 ) {
423 s = s.substring( p + 2, s.length() );
424 }
425
426 // set metadata set to insert by replacing the string '$metadata$'
427 // in the template
428 return StringTools.replace( insert, "$metadata$", s, false );
429
430 }
431
432 /**
433 * actualizes the source in the repository with timestamp of last harvesting
434 *
435 * @param source
436 * @param date
437 * @throws SQLException
438 * @throws DBPoolException
439 */
440 protected void writeLastHarvestingTimestamp( URI source, Date date )
441 throws IOException, DBPoolException, SQLException {
442 HarvestRepository repository = HarvestRepository.getInstance();
443 repository.setLastHarvestingTimestamp( source, date );
444 }
445
446 /**
447 * actualizes the source in the repository with timestamp when next harvesting shall be
448 * performed
449 *
450 * @param source
451 * @param date
452 * @throws SQLException
453 * @throws DBPoolException
454 */
455 protected void writeNextHarvestingTimestamp( URI source, Date date )
456 throws IOException, DBPoolException, SQLException {
457 HarvestRepository repository = HarvestRepository.getInstance();
458 long ts = repository.getHarvestInterval( source );
459 date = new Date( ts + date.getTime() );
460 repository.setNextHarvestingTimestamp( source, date );
461 }
462
463 }
464
465 /**
466 * returns the XPath the metadata records identifier
467 *
468 * @param metaData
469 * @return the XPath the metadata records identifier
470 */
471 protected String getIdentifierXPath( XMLFragment metaData ) {
472 // default is iso 19115
473 String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
474 if ( metaData != null ) {
475 String nspace = metaData.getRootElement().getNamespaceURI();
476 nspace = StringTools.replace( nspace, "http://", "", true );
477 xpath = Messages.getString( "Identifier_" + nspace );
478 }
479 return xpath;
480 }
481
482 /**
483 * returns the XPath the metadata records identifier
484 *
485 * @param metaData
486 * @return the XPath the metadata records identifier
487 */
488 protected String getIdentifierXPathForUpdate( XMLFragment metaData ) {
489 String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
490 if ( metaData != null ) {
491 String nspace = metaData.getRootElement().getNamespaceURI();
492 nspace = StringTools.replace( nspace, "http://", "", true );
493 xpath = Messages.getString( "IdentifierUpdate_" + nspace );
494 }
495 return xpath;
496 }
497
498 }