001    //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/branches/2.2_testing/src/org/deegree/ogcwebservices/csw/manager/AbstractHarvester.java $
002    /*----------------    FILE HEADER  ------------------------------------------
003    
004     This file is part of deegree.
005     Copyright (C) 2001-2008 by:
006     EXSE, Department of Geography, University of Bonn
007     http://www.giub.uni-bonn.de/deegree/
008     lat/lon GmbH
009     http://www.lat-lon.de
010    
011     This library is free software; you can redistribute it and/or
012     modify it under the terms of the GNU Lesser General Public
013     License as published by the Free Software Foundation; either
014     version 2.1 of the License, or (at your option) any later version.
015    
016     This library is distributed in the hope that it will be useful,
017     but WITHOUT ANY WARRANTY; without even the implied warranty of
018     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
019     Lesser General Public License for more details.
020    
021     You should have received a copy of the GNU Lesser General Public
022     License along with this library; if not, write to the Free Software
023     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
024    
025     Contact:
026    
027     Andreas Poth
028     lat/lon GmbH
029     Aennchenstr. 19
030     53115 Bonn
031     Germany
032     E-Mail: poth@lat-lon.de
033    
034     Prof. Dr. Klaus Greve
035     Department of Geography
036     University of Bonn
037     Meckenheimer Allee 166
038     53115 Bonn
039     Germany
040     E-Mail: greve@giub.uni-bonn.de
041    
042     
043     ---------------------------------------------------------------------------*/
044    package org.deegree.ogcwebservices.csw.manager;
045    
046    import java.io.IOException;
047    import java.io.StringReader;
048    import java.net.MalformedURLException;
049    import java.net.URI;
050    import java.net.URISyntaxException;
051    import java.net.URL;
052    import java.sql.SQLException;
053    import java.util.Date;
054    import java.util.Iterator;
055    import java.util.List;
056    import java.util.Timer;
057    import java.util.TimerTask;
058    import java.util.Vector;
059    
060    import org.apache.commons.httpclient.HttpClient;
061    import org.apache.commons.httpclient.HttpException;
062    import org.apache.commons.httpclient.methods.GetMethod;
063    import org.deegree.enterprise.WebUtils;
064    import org.deegree.framework.log.ILogger;
065    import org.deegree.framework.log.LoggerFactory;
066    import org.deegree.framework.mail.EMailMessage;
067    import org.deegree.framework.mail.MailHelper;
068    import org.deegree.framework.mail.MailMessage;
069    import org.deegree.framework.mail.SendMailException;
070    import org.deegree.framework.util.FileUtils;
071    import org.deegree.framework.util.StringTools;
072    import org.deegree.framework.xml.NamespaceContext;
073    import org.deegree.framework.xml.XMLFragment;
074    import org.deegree.framework.xml.XMLParsingException;
075    import org.deegree.io.DBPoolException;
076    import org.deegree.ogcbase.CommonNamespaces;
077    import org.deegree.ogcwebservices.OGCWebServiceException;
078    import org.deegree.ogcwebservices.csw.CSWFactory;
079    import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
080    import org.xml.sax.SAXException;
081    
082    /**
083     * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester
084     * shall be implemented. A concrete implementation of AbstractHarvester will be called within a
085     * timer loop.
086     * 
087     * 
088     * @version $Revision: 9345 $
089     * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
090     * @author last edited by: $Author: apoth $
091     * 
092     * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $
093     * 
094     * @since 2.0
095     */
096    public abstract class AbstractHarvester extends TimerTask {
097    
098        private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class );
099    
100        private boolean stopped = true;
101    
102        private Timer timer = null;
103    
104        protected List<URI> inProgress = new Vector<URI>();
105    
106        protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext();
107        static {
108            try {
109                nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) );
110                nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) );
111                nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) );
112            } catch ( URISyntaxException e ) {
113                e.printStackTrace();
114            }
115        }
116    
117        /**
118         * adds a request to the harvesting process
119         * 
120         * @param request
121         * @throws SQLException
122         * @throws DBPoolException
123         */
124        public void addRequest( Harvest request )
125                                throws IOException, DBPoolException, SQLException {
126            HarvestRepository.getInstance().storeRequest( request );
127        }
128    
129        /**
130         * returns true if the harvesting process is running
131         * 
132         * @return <code>true</code> if the harvesting process is running
133         */
134        public boolean isRunning() {
135            return !stopped;
136        }
137    
138        /**
139         * removes a request from the harvesting request.
140         * <p>
141         * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic
142         * harvesting job, so this method can not be called with a standard OGC OWS request !!</b>
143         * </p>
144         * 
145         * @param request
146         * @throws SQLException
147         * @throws DBPoolException
148         */
149        public void removeRequest( Harvest request )
150                                throws IOException, DBPoolException, SQLException {
151            HarvestRepository.getInstance().dropRequest( request.getSource() );
152        }
153    
154        /**
155         * starts the harvesting process
156         * 
157         */
158        public void startHarvesting() {
159            timer = new Timer();
160            timer.schedule( this, 0, 10000 );
161            stopped = false;
162            LOG.logInfo( "harvesting has been started" );
163        }
164    
165        /**
166         * stops the harvesting process
167         * 
168         */
169        public void stopHarvesting() {
170            timer.purge();
171            timer.cancel();
172            stopped = true;
173            LOG.logInfo( "harvesting has been stopped" );
174        }
175    
176        /**
177         * informs all response handlers assigend to a source about successful harvesting of the source
178         * 
179         * @param source
180         * @throws URISyntaxException
181         * @throws SQLException
182         * @throws DBPoolException
183         * @throws MalformedURLException
184         */
185        protected void informResponseHandlers( URI source )
186                                throws IOException, DBPoolException, SQLException, URISyntaxException {
187    
188            List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
189    
190            for ( Iterator iter = list.iterator(); iter.hasNext(); ) {
191                HarvestRepository.ResponseHandler handler = (HarvestRepository.ResponseHandler) iter.next();
192                String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" );
193                sendMessage( handler, message );
194            }
195    
196        }
197    
198        /**
199         * returns true if the passed source shall be harvested. this is true if a source has not been
200         * harvested before or the next harvesting timestamp has been reached and the source is of type
201         * 
202         * @see HarvestRepository.ResourceType service
203         * 
204         * @param source
205         * @return <code>true</code> if the passed source shall be harvested
206         * @throws DBPoolException
207         * @throws SQLException
208         */
209        protected boolean shallHarvest( URI source, ResourceType targetType )
210                                throws IOException, DBPoolException, SQLException {
211    
212            if ( inProgress.contains( source ) ) {
213                return false;
214            }
215    
216            HarvestRepository repository = HarvestRepository.getInstance();
217    
218            ResourceType st = repository.getSourceType( source );
219    
220            if ( !st.equals( targetType ) ) {
221                return false;
222            }
223    
224            Date lastHarvesting = repository.getLastHarvestingTimestamp( source );
225            Date nextHarvesting = repository.getNextHarvestingTimestamp( source );
226    
227            long tmp = System.currentTimeMillis() - nextHarvesting.getTime();
228            return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source );
229        }
230    
231        /**
232         * informs all response handlers assigend to a source about an exception that occurs when
233         * harvesting a source
234         * 
235         * @param source
236         * @param e
237         * @throws URISyntaxException
238         * @throws SQLException
239         * @throws DBPoolException
240         */
241        protected void informResponseHandlers( URI source, Throwable e )
242                                throws IOException, DBPoolException, SQLException, URISyntaxException {
243    
244            List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
245    
246            for ( Iterator iter = list.iterator(); iter.hasNext(); ) {
247                HarvestRepository.ResponseHandler handler = (HarvestRepository.ResponseHandler) iter.next();
248                String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ",
249                                                     e.getMessage() );
250                sendMessage( handler, message );
251    
252            }
253    
254        }
255    
256        /**
257         * 
258         * @param handler
259         * @param message
260         * @throws SendMailException
261         * @throws MalformedURLException
262         * @throws IOException
263         * @throws HttpException
264         */
265        private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) {
266            if ( handler.isMailAddress() ) {
267                String s = handler.getUri().toASCIIString();
268                int p = s.indexOf( ":" );
269                s = s.substring( p + 1, s.length() );
270                LOG.logDebug( "sending message", message );
271                LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) );
272                MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message );
273                try {
274                    MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) );
275                } catch ( Exception e ) {
276                    LOG.logInfo( "could not send mail to admin:", e.getMessage() );
277                    LOG.logError( e.getMessage(), e );
278                }
279            } else {
280                LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) );
281                HttpClient client = new HttpClient();
282                LOG.logDebug( message );
283    
284                try {
285                    client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() );
286                    GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message );
287                    client.executeMethod( get );
288                } catch ( Exception e ) {
289                    LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: "
290                                 + e.getMessage() );
291                }
292    
293            }
294        }
295    
296        /**
297         * abstract super class for all harvest processores
298         * 
299         * 
300         * @version $Revision: 9345 $
301         * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
302         * @author last edited by: $Author: apoth $
303         * 
304         * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $
305         * 
306         * @since 2.0
307         */
308        protected abstract class AbstractHarvestProcessor extends Thread {
309    
310            protected URI source = null;
311    
312            protected AbstractHarvester owner = null;
313    
314            protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) {
315                this.owner = owner;
316                this.source = source;
317            }
318    
319            /**
320             * performs a transaction for inserting or updating a service meta data record in the
321             * catalogue a harvester instance belongs too
322             * 
323             * @param trans
324             * @throws SAXException
325             * @throws IOException
326             * @throws XMLParsingException
327             * @throws OGCWebServiceException
328             */
329            protected void performTransaction( String trans )
330                                    throws SAXException, IOException, OGCWebServiceException {
331    
332                StringReader sr = new StringReader( trans );
333                XMLFragment xml = new XMLFragment();
334                xml.load( sr, XMLFragment.DEFAULT_URL );
335                Transaction transaction = Transaction.create( "id", xml.getRootElement() );
336                CSWFactory.getService().doService( transaction );
337    
338            }
339    
340            /**
341             * creates a CSW Transaction including an Update operation for the passed meta data.
342             * 
343             * @param identifier
344             * @param xpath
345             * @param metaData
346             * @return update request
347             * @throws IOException
348             */
349            protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData )
350                                    throws IOException {
351    
352                // read template from file
353                // TODO
354                // read different templates depending on metadata format
355                URL url = AbstractHarvester.class.getResource( "iso09_update_template.xml" );
356                String update = FileUtils.readTextFile( url ).toString();
357    
358                // remove XML doctype section to enable inserting the metadata set as string
359                // into the template
360                String s = metaData.getAsString();
361                int p = s.lastIndexOf( "?>" );
362                if ( p > -1 ) {
363                    s = s.substring( p + 2, s.length() );
364                }
365    
366                // set metadata set to update by replacing the string '$metadata$'
367                // in the template
368                update = StringTools.replace( update, "$metadata$", s, false );
369                s = createConstraint( identifier, xpath ).toString();
370    
371                // set filter/constraint to identify the metadata set to be updated
372                // by replacing the string '$constraints$' in the template
373                return StringTools.replace( update, "$constraints$", s, false );
374            }
375    
376            /**
377             * creates a transaction request including a delete operation to remove the metadata record
378             * with the passed fileIdentifier from the catalogue
379             * 
380             * @param identifier
381             * @param xpath
382             * @return delete request
383             * @throws IOException
384             */
385            protected String createDeleteRequest( String identifier, String xpath )
386                                    throws IOException {
387                // read template from file
388                // TODO
389                // read different templates depending on metadata format
390                URL url = AbstractHarvester.class.getResource( "iso09_delete_template.xml" );
391                String delete = FileUtils.readTextFile( url ).toString();
392    
393                String s = createConstraint( identifier, xpath ).toString();
394    
395                // set filter/constraint to identify the metadata set to be deleted
396                // by replacing the string '$constraints$' in the template
397                return StringTools.replace( delete, "$constraints$", s, false );
398            }
399    
400            /**
401             * a constraint for delete und update operation depends on concrete metadata format. An
402             * implementing class must consider this.
403             * 
404             * @param fileIdentifier
405             *            value to be compared
406             * @param xpath
407             *            comparable property
408             * @return
409             */
410            protected abstract String createConstraint( String fileIdentifier, String xpath )
411                                    throws IOException;
412    
413            /**
414             * creates a CSW Transaction including an Update operation for the passed meta data
415             * 
416             * @param metaData
417             * @return
418             * @throws IOException
419             */
420            protected String createInsertRequest( XMLFragment metaData )
421                                    throws IOException {
422                // read template from file
423                // TODO
424                // read different templates depending on metadata format
425                URL url = AbstractHarvester.class.getResource( "iso09_insert_template.xml" );
426                String insert = FileUtils.readTextFile( url ).toString();
427    
428                String s = metaData.getAsString();
429                int p = s.lastIndexOf( "?>" );
430                if ( p > -1 ) {
431                    s = s.substring( p + 2, s.length() );
432                }
433    
434                // set metadata set to insert by replacing the string '$metadata$'
435                // in the template
436                return StringTools.replace( insert, "$metadata$", s, false );
437    
438            }
439    
440            /**
441             * actualizes the source in the repository with timestamp of last harvesting
442             * 
443             * @param source
444             * @param date
445             * @throws SQLException
446             * @throws DBPoolException
447             */
448            protected void writeLastHarvestingTimestamp( URI source, Date date )
449                                    throws IOException, DBPoolException, SQLException {
450                HarvestRepository repository = HarvestRepository.getInstance();
451                repository.setLastHarvestingTimestamp( source, date );
452            }
453    
454            /**
455             * actualizes the source in the repository with timestamp when next harvesting shall be
456             * performed
457             * 
458             * @param source
459             * @param date
460             * @throws SQLException
461             * @throws DBPoolException
462             */
463            protected void writeNextHarvestingTimestamp( URI source, Date date )
464                                    throws IOException, DBPoolException, SQLException {
465                HarvestRepository repository = HarvestRepository.getInstance();
466                long ts = repository.getHarvestInterval( source );
467                date = new Date( ts + date.getTime() );
468                repository.setNextHarvestingTimestamp( source, date );
469            }
470    
471        }
472    
473    }