001    //$HeadURL: svn+ssh://jwilden@svn.wald.intevation.org/deegree/base/branches/2.5_testing/src/org/deegree/ogcwebservices/csw/manager/AbstractHarvester.java $
002    /*----------------------------------------------------------------------------
003     This file is part of deegree, http://deegree.org/
004     Copyright (C) 2001-2009 by:
005     Department of Geography, University of Bonn
006     and
007     lat/lon GmbH
008    
009     This library is free software; you can redistribute it and/or modify it under
010     the terms of the GNU Lesser General Public License as published by the Free
011     Software Foundation; either version 2.1 of the License, or (at your option)
012     any later version.
013     This library is distributed in the hope that it will be useful, but WITHOUT
014     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
015     FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
016     details.
017     You should have received a copy of the GNU Lesser General Public License
018     along with this library; if not, write to the Free Software Foundation, Inc.,
019     59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
020    
021     Contact information:
022    
023     lat/lon GmbH
024     Aennchenstr. 19, 53177 Bonn
025     Germany
026     http://lat-lon.de/
027    
028     Department of Geography, University of Bonn
029     Prof. Dr. Klaus Greve
030     Postfach 1147, 53001 Bonn
031     Germany
032     http://www.geographie.uni-bonn.de/deegree/
033    
034     e-mail: info@deegree.org
035     ----------------------------------------------------------------------------*/
036    package org.deegree.ogcwebservices.csw.manager;
037    
038    import java.io.IOException;
039    import java.io.StringReader;
040    import java.net.MalformedURLException;
041    import java.net.URI;
042    import java.net.URISyntaxException;
043    import java.net.URL;
044    import java.sql.SQLException;
045    import java.util.Date;
046    import java.util.Iterator;
047    import java.util.List;
048    import java.util.Timer;
049    import java.util.TimerTask;
050    import java.util.Vector;
051    
052    import org.apache.commons.httpclient.HttpClient;
053    import org.apache.commons.httpclient.methods.GetMethod;
054    import org.deegree.enterprise.WebUtils;
055    import org.deegree.framework.log.ILogger;
056    import org.deegree.framework.log.LoggerFactory;
057    import org.deegree.framework.mail.EMailMessage;
058    import org.deegree.framework.mail.MailHelper;
059    import org.deegree.framework.mail.MailMessage;
060    import org.deegree.framework.util.FileUtils;
061    import org.deegree.framework.util.StringTools;
062    import org.deegree.framework.xml.NamespaceContext;
063    import org.deegree.framework.xml.XMLFragment;
064    import org.deegree.io.DBPoolException;
065    import org.deegree.ogcbase.CommonNamespaces;
066    import org.deegree.ogcwebservices.OGCWebServiceException;
067    import org.deegree.ogcwebservices.csw.CSWFactory;
068    import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
069    import org.xml.sax.SAXException;
070    
071    /**
072     * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester shall be implemented. A
073     * concrete implementation of AbstractHarvester will be called within a timer loop.
074     * 
075     * 
076     * @version $Revision: 21672 $
077     * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
078     * @author last edited by: $Author: apoth $
079     * 
080     * @version 1.0. $Revision: 21672 $, $Date: 2009-12-29 09:44:20 +0100 (Di, 29 Dez 2009) $
081     * 
082     * @since 2.0
083     */
084    public abstract class AbstractHarvester extends TimerTask {
085    
086        private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class );
087    
088        private boolean stopped = true;
089    
090        private Timer timer = null;
091    
092        protected List<URI> inProgress = new Vector<URI>();
093    
094        protected String version;
095    
096        protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext();
097        static {
098            try {
099                nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) );
100                nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) );
101                nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) );
102            } catch ( URISyntaxException e ) {
103                e.printStackTrace();
104            }
105        }
106    
107        /**
108         * @param version
109         *            the version of the CSW
110         */
111        protected AbstractHarvester( String version ) {
112            this.version = version;
113        }
114    
115        /**
116         * adds a request to the harvesting process
117         * 
118         * @param request
119         * @throws IOException
120         * @throws SQLException
121         * @throws DBPoolException
122         */
123        public void addRequest( Harvest request )
124                                throws IOException, DBPoolException, SQLException {
125            HarvestRepository.getInstance().storeRequest( request );
126        }
127    
128        /**
129         * returns true if the harvesting process is running
130         * 
131         * @return <code>true</code> if the harvesting process is running
132         */
133        public boolean isRunning() {
134            return !stopped;
135        }
136    
137        /**
138         * removes a request from the harvesting request.
139         * <p>
140         * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic harvesting job, so this
141         * method can not be called with a standard OGC OWS request !!</b>
142         * </p>
143         * 
144         * @param request
145         * @throws IOException
146         * @throws SQLException
147         * @throws DBPoolException
148         */
149        public void removeRequest( Harvest request )
150                                throws IOException, DBPoolException, SQLException {
151            HarvestRepository.getInstance().dropRequest( request.getSource() );
152        }
153    
154        /**
155         * starts the harvesting process
156         * 
157         */
158        public void startHarvesting() {
159            timer = new Timer();
160            timer.schedule( this, 0, 10000 );
161            stopped = false;
162            LOG.logInfo( "harvesting has been started" );
163        }
164    
165        /**
166         * stops the harvesting process
167         * 
168         */
169        public void stopHarvesting() {
170            timer.purge();
171            timer.cancel();
172            stopped = true;
173            LOG.logInfo( "harvesting has been stopped" );
174        }
175    
176        /**
177         * informs all response handlers assigend to a source about successful harvesting of the source
178         * 
179         * @param source
180         * @throws URISyntaxException
181         * @throws SQLException
182         * @throws DBPoolException
183         * @throws MalformedURLException
184         */
185        protected void informResponseHandlers( URI source )
186                                throws IOException, DBPoolException, SQLException, URISyntaxException {
187    
188            List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
189    
190            for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
191                HarvestRepository.ResponseHandler handler = iter.next();
192                String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" );
193                sendMessage( handler, message );
194            }
195    
196        }
197    
198        /**
199         * returns true if the passed source shall be harvested. this is true if a source has not been harvested before or
200         * the next harvesting timestamp has been reached and the source is of type
201         * 
202         * @see HarvestRepository.ResourceType service
203         * 
204         * @param source
205         * @return <code>true</code> if the passed source shall be harvested
206         * @throws DBPoolException
207         * @throws SQLException
208         */
209        protected boolean shallHarvest( URI source, ResourceType targetType )
210                                throws IOException, DBPoolException, SQLException {
211    
212            if ( inProgress.contains( source ) ) {
213                return false;
214            }
215    
216            HarvestRepository repository = HarvestRepository.getInstance();
217    
218            ResourceType st = repository.getSourceType( source );
219    
220            if ( !st.equals( targetType ) ) {
221                return false;
222            }
223    
224            Date lastHarvesting = repository.getLastHarvestingTimestamp( source );
225            Date nextHarvesting = repository.getNextHarvestingTimestamp( source );
226    
227            long tmp = System.currentTimeMillis() - nextHarvesting.getTime();
228            return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source );
229        }
230    
231        /**
232         * informs all response handlers assigend to a source about an exception that occurs when harvesting a source
233         * 
234         * @param source
235         * @param e
236         * @throws URISyntaxException
237         * @throws SQLException
238         * @throws DBPoolException
239         */
240        protected void informResponseHandlers( URI source, Throwable e )
241                                throws IOException, DBPoolException, SQLException, URISyntaxException {
242    
243            List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
244    
245            for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
246                HarvestRepository.ResponseHandler handler = iter.next();
247                String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ",
248                                                     e.getMessage() );
249                sendMessage( handler, message );
250    
251            }
252    
253        }
254    
255        /**
256         * 
257         * @param handler
258         * @param message
259         */
260        private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) {
261            if ( handler.isMailAddress() ) {
262                String s = handler.getUri().toASCIIString();
263                int p = s.indexOf( ":" );
264                s = s.substring( p + 1, s.length() );
265                LOG.logDebug( "sending message", message );
266                LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) );
267                MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message );
268                try {
269                    MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) );
270                } catch ( Exception e ) {
271                    LOG.logInfo( "could not send mail to admin:", e.getMessage() );
272                    LOG.logError( e.getMessage(), e );
273                }
274            } else {
275                LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) );
276                HttpClient client = new HttpClient();
277                LOG.logDebug( message );
278    
279                try {
280                    client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() );
281                    GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message );
282                    client.executeMethod( get );
283                } catch ( Exception e ) {
284                    LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: "
285                                 + e.getMessage() );
286                }
287    
288            }
289        }
290    
291        /**
292         * abstract super class for all harvest processores
293         * 
294         * 
295         * @version $Revision: 21672 $
296         * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
297         * @author last edited by: $Author: apoth $
298         * 
299         * @version 1.0. $Revision: 21672 $, $Date: 2009-12-29 09:44:20 +0100 (Di, 29 Dez 2009) $
300         * 
301         * @since 2.0
302         */
303        protected abstract class AbstractHarvestProcessor extends Thread {
304    
305            protected URI source = null;
306    
307            protected AbstractHarvester owner = null;
308    
309            protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) {
310                this.owner = owner;
311                this.source = source;
312            }
313    
314            /**
315             * performs a transaction for inserting or updating a service meta data record in the catalogue a harvester
316             * instance belongs too
317             * 
318             * @param trans
319             * @throws SAXException
320             * @throws IOException
321             * @throws OGCWebServiceException
322             */
323            protected void performTransaction( String trans )
324                                    throws SAXException, IOException, OGCWebServiceException {
325    
326                StringReader sr = new StringReader( trans );
327                XMLFragment xml = new XMLFragment();
328                xml.load( sr, XMLFragment.DEFAULT_URL );
329                Transaction transaction = Transaction.create( "id", xml.getRootElement() );
330                CSWFactory.getService().doService( transaction );
331    
332            }
333    
334            /**
335             * creates a CSW Transaction including an Update operation for the passed meta data.
336             * 
337             * @param identifier
338             * @param xpath
339             * @param metaData
340             * @return update request
341             * @throws IOException
342             */
343            protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData )
344                                    throws IOException {
345                // read template from file
346                URL url = Templates.getTemplate( "Update_" + version );
347                String update = FileUtils.readTextFile( url ).toString();
348    
349                // remove XML doctype section to enable inserting the metadata set as string
350                // into the template
351                // TODO transform metaData into xml valid against the correct ISO AP version
352                String s = metaData.getAsString();
353                int p = s.lastIndexOf( "?>" );
354                if ( p > -1 ) {
355                    s = s.substring( p + 2, s.length() );
356                }
357    
358                // set metadata set to update by replacing the string '$metadata$'
359                // in the template
360                update = StringTools.replace( update, "$metadata$", s, false );
361                s = createConstraint( identifier, xpath ).toString();
362    
363                // set filter/constraint to identify the metadata set to be updated
364                // by replacing the string '$constraints$' in the template
365                return StringTools.replace( update, "$constraints$", s, false );
366            }
367    
368            /**
369             * creates a transaction request including a delete operation to remove the metadata record with the passed
370             * fileIdentifier from the catalogue
371             * 
372             * @param identifier
373             * @return delete request
374             * @throws IOException
375             */
376            protected String createDeleteRequest( String identifier )
377                                    throws IOException {
378                // read template from file
379                URL url = Templates.getTemplate( "Delete_" + version );
380                String delete = FileUtils.readTextFile( url ).toString();
381    
382                String xpath = Messages.getString( "IdentifierDelete_" + version );
383                String s = createConstraint( identifier, xpath ).toString();
384    
385                // set filter/constraint to identify the metadata set to be deleted
386                // by replacing the string '$constraints$' in the template
387                return StringTools.replace( delete, "$constraints$", s, false );
388            }
389    
390            /**
391             * a constraint for delete und update operation depends on concrete metadata format. An implementing class must
392             * consider this.
393             * 
394             * @param fileIdentifier
395             *            value to be compared
396             * @param xpath
397             *            comparable property
398             * @return the constraint
399             */
400            protected abstract String createConstraint( String fileIdentifier, String xpath )
401                                    throws IOException;
402    
403            /**
404             * creates a CSW Transaction including an Update operation for the passed meta data
405             * 
406             * @param metaData
407             * @return the request
408             * @throws IOException
409             */
410            protected String createInsertRequest( XMLFragment metaData )
411                                    throws IOException {
412                // read template from file
413                URL url = Templates.getTemplate( "Insert_" + version );
414                String insert = FileUtils.readTextFile( url ).toString();
415    
416                // TODO transform metaData into xml valid against the correct ISO AP version
417                String s = metaData.getAsString();
418                int p = s.lastIndexOf( "?>" );
419                if ( p > -1 ) {
420                    s = s.substring( p + 2, s.length() );
421                }
422    
423                // set metadata set to insert by replacing the string '$metadata$'
424                // in the template
425                return StringTools.replace( insert, "$metadata$", s, false );
426    
427            }
428    
429            /**
430             * actualizes the source in the repository with timestamp of last harvesting
431             * 
432             * @param source
433             * @param date
434             * @throws SQLException
435             * @throws DBPoolException
436             */
437            protected void writeLastHarvestingTimestamp( URI source, Date date )
438                                    throws IOException, DBPoolException, SQLException {
439                HarvestRepository repository = HarvestRepository.getInstance();
440                repository.setLastHarvestingTimestamp( source, date );
441            }
442    
443            /**
444             * actualizes the source in the repository with timestamp when next harvesting shall be performed
445             * 
446             * @param source
447             * @param date
448             * @throws SQLException
449             * @throws DBPoolException
450             */
451            protected void writeNextHarvestingTimestamp( URI source, Date date )
452                                    throws IOException, DBPoolException, SQLException {
453                HarvestRepository repository = HarvestRepository.getInstance();
454                long ts = repository.getHarvestInterval( source );
455                date = new Date( ts + date.getTime() );
456                repository.setNextHarvestingTimestamp( source, date );
457            }
458    
459        }
460    
461        /**
462         * returns the XPath the metadata records identifier
463         * 
464         * @param metaData
465         * @return the XPath the metadata records identifier
466         */
467        protected String getIdentifierXPath( XMLFragment metaData ) {
468            // default is iso 19115
469            String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
470            if ( metaData != null ) {
471                String nspace = metaData.getRootElement().getNamespaceURI();
472                nspace = StringTools.replace( nspace, "http://", "", true );
473                xpath = Messages.getString( "Identifier_" + nspace );
474            }
475            return xpath;
476        }
477    
478        /**
479         * returns the XPath the metadata records identifier
480         * 
481         * @param metaData
482         * @return the XPath the metadata records identifier
483         */
484        protected String getIdentifierXPathForUpdate( XMLFragment metaData ) {
485            String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
486            if ( metaData != null ) {
487                String nspace = metaData.getRootElement().getNamespaceURI();
488                nspace = StringTools.replace( nspace, "http://", "", true );
489                xpath = Messages.getString( "IdentifierUpdate_" + nspace );
490            }
491            return xpath;
492        }
493    
494    }