001    //$HeadURL: https://svn.wald.intevation.org/svn/deegree/base/branches/2.3_testing/src/org/deegree/ogcwebservices/csw/manager/AbstractHarvester.java $
002    /*----------------------------------------------------------------------------
003     This file is part of deegree, http://deegree.org/
004     Copyright (C) 2001-2009 by:
005     Department of Geography, University of Bonn
006     and
007     lat/lon GmbH
008    
009     This library is free software; you can redistribute it and/or modify it under
010     the terms of the GNU Lesser General Public License as published by the Free
011     Software Foundation; either version 2.1 of the License, or (at your option)
012     any later version.
013     This library is distributed in the hope that it will be useful, but WITHOUT
014     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
015     FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
016     details.
017     You should have received a copy of the GNU Lesser General Public License
018     along with this library; if not, write to the Free Software Foundation, Inc.,
019     59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
020    
021     Contact information:
022    
023     lat/lon GmbH
024     Aennchenstr. 19, 53177 Bonn
025     Germany
026     http://lat-lon.de/
027    
028     Department of Geography, University of Bonn
029     Prof. Dr. Klaus Greve
030     Postfach 1147, 53001 Bonn
031     Germany
032     http://www.geographie.uni-bonn.de/deegree/
033    
034     e-mail: info@deegree.org
035     ----------------------------------------------------------------------------*/
036    package org.deegree.ogcwebservices.csw.manager;
037    
038    import java.io.IOException;
039    import java.io.StringReader;
040    import java.net.MalformedURLException;
041    import java.net.URI;
042    import java.net.URISyntaxException;
043    import java.net.URL;
044    import java.sql.SQLException;
045    import java.util.Date;
046    import java.util.Iterator;
047    import java.util.List;
048    import java.util.Timer;
049    import java.util.TimerTask;
050    import java.util.Vector;
051    
052    import org.apache.commons.httpclient.HttpClient;
053    import org.apache.commons.httpclient.methods.GetMethod;
054    import org.deegree.enterprise.WebUtils;
055    import org.deegree.framework.log.ILogger;
056    import org.deegree.framework.log.LoggerFactory;
057    import org.deegree.framework.mail.EMailMessage;
058    import org.deegree.framework.mail.MailHelper;
059    import org.deegree.framework.mail.MailMessage;
060    import org.deegree.framework.util.FileUtils;
061    import org.deegree.framework.util.StringTools;
062    import org.deegree.framework.xml.NamespaceContext;
063    import org.deegree.framework.xml.XMLFragment;
064    import org.deegree.io.DBPoolException;
065    import org.deegree.ogcbase.CommonNamespaces;
066    import org.deegree.ogcwebservices.OGCWebServiceException;
067    import org.deegree.ogcwebservices.csw.CSWFactory;
068    import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
069    import org.xml.sax.SAXException;
070    
071    /**
072     * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester
073     * shall be implemented. A concrete implementation of AbstractHarvester will be called within a
074     * timer loop.
075     * 
076     * 
077     * @version $Revision: 19475 $
078     * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
079     * @author last edited by: $Author: lbuesching $
080     * 
081     * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $
082     * 
083     * @since 2.0
084     */
085    public abstract class AbstractHarvester extends TimerTask {
086    
087        private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class );
088    
089        private boolean stopped = true;
090    
091        private Timer timer = null;
092    
093        protected List<URI> inProgress = new Vector<URI>();
094    
095        protected String version;
096    
097        protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext();
098        static {
099            try {
100                nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) );
101                nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) );
102                nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) );
103            } catch ( URISyntaxException e ) {
104                e.printStackTrace();
105            }
106        }
107    
108        /**
109         * @param version
110         *            the version of the CSW
111         */
112        protected AbstractHarvester( String version ) {
113            this.version = version;
114        }
115    
116        /**
117         * adds a request to the harvesting process
118         * 
119         * @param request
120         * @throws IOException
121         * @throws SQLException
122         * @throws DBPoolException
123         */
124        public void addRequest( Harvest request )
125                                throws IOException, DBPoolException, SQLException {
126            HarvestRepository.getInstance().storeRequest( request );
127        }
128    
129        /**
130         * returns true if the harvesting process is running
131         * 
132         * @return <code>true</code> if the harvesting process is running
133         */
134        public boolean isRunning() {
135            return !stopped;
136        }
137    
138        /**
139         * removes a request from the harvesting request.
140         * <p>
141         * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic
142         * harvesting job, so this method can not be called with a standard OGC OWS request !!</b>
143         * </p>
144         * 
145         * @param request
146         * @throws IOException
147         * @throws SQLException
148         * @throws DBPoolException
149         */
150        public void removeRequest( Harvest request )
151                                throws IOException, DBPoolException, SQLException {
152            HarvestRepository.getInstance().dropRequest( request.getSource() );
153        }
154    
155        /**
156         * starts the harvesting process
157         * 
158         */
159        public void startHarvesting() {
160            timer = new Timer();
161            timer.schedule( this, 0, 10000 );
162            stopped = false;
163            LOG.logInfo( "harvesting has been started" );
164        }
165    
166        /**
167         * stops the harvesting process
168         * 
169         */
170        public void stopHarvesting() {
171            timer.purge();
172            timer.cancel();
173            stopped = true;
174            LOG.logInfo( "harvesting has been stopped" );
175        }
176    
177        /**
178         * informs all response handlers assigend to a source about successful harvesting of the source
179         * 
180         * @param source
181         * @throws URISyntaxException
182         * @throws SQLException
183         * @throws DBPoolException
184         * @throws MalformedURLException
185         */
186        protected void informResponseHandlers( URI source )
187                                throws IOException, DBPoolException, SQLException, URISyntaxException {
188    
189            List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
190    
191            for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
192                HarvestRepository.ResponseHandler handler = iter.next();
193                String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" );
194                sendMessage( handler, message );
195            }
196    
197        }
198    
199        /**
200         * returns true if the passed source shall be harvested. this is true if a source has not been
201         * harvested before or the next harvesting timestamp has been reached and the source is of type
202         * 
203         * @see HarvestRepository.ResourceType service
204         * 
205         * @param source
206         * @return <code>true</code> if the passed source shall be harvested
207         * @throws DBPoolException
208         * @throws SQLException
209         */
210        protected boolean shallHarvest( URI source, ResourceType targetType )
211                                throws IOException, DBPoolException, SQLException {
212    
213            if ( inProgress.contains( source ) ) {
214                return false;
215            }
216    
217            HarvestRepository repository = HarvestRepository.getInstance();
218    
219            ResourceType st = repository.getSourceType( source );
220    
221            if ( !st.equals( targetType ) ) {
222                return false;
223            }
224    
225            Date lastHarvesting = repository.getLastHarvestingTimestamp( source );
226            Date nextHarvesting = repository.getNextHarvestingTimestamp( source );
227    
228            long tmp = System.currentTimeMillis() - nextHarvesting.getTime();
229            return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source );
230        }
231    
232        /**
233         * informs all response handlers assigend to a source about an exception that occurs when
234         * harvesting a source
235         * 
236         * @param source
237         * @param e
238         * @throws URISyntaxException
239         * @throws SQLException
240         * @throws DBPoolException
241         */
242        protected void informResponseHandlers( URI source, Throwable e )
243                                throws IOException, DBPoolException, SQLException, URISyntaxException {
244    
245            List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
246    
247            for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
248                HarvestRepository.ResponseHandler handler = iter.next();
249                String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ",
250                                                     e.getMessage() );
251                sendMessage( handler, message );
252    
253            }
254    
255        }
256    
257        /**
258         * 
259         * @param handler
260         * @param message
261         */
262        private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) {
263            if ( handler.isMailAddress() ) {
264                String s = handler.getUri().toASCIIString();
265                int p = s.indexOf( ":" );
266                s = s.substring( p + 1, s.length() );
267                LOG.logDebug( "sending message", message );
268                LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) );
269                MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message );
270                try {
271                    MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) );
272                } catch ( Exception e ) {
273                    LOG.logInfo( "could not send mail to admin:", e.getMessage() );
274                    LOG.logError( e.getMessage(), e );
275                }
276            } else {
277                LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) );
278                HttpClient client = new HttpClient();
279                LOG.logDebug( message );
280    
281                try {
282                    client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() );
283                    GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message );
284                    client.executeMethod( get );
285                } catch ( Exception e ) {
286                    LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: "
287                                 + e.getMessage() );
288                }
289    
290            }
291        }
292    
293        /**
294         * abstract super class for all harvest processores
295         * 
296         * 
297         * @version $Revision: 19475 $
298         * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
299         * @author last edited by: $Author: lbuesching $
300         * 
301         * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $
302         * 
303         * @since 2.0
304         */
305        protected abstract class AbstractHarvestProcessor extends Thread {
306    
307            protected URI source = null;
308    
309            protected AbstractHarvester owner = null;
310    
311            protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) {
312                this.owner = owner;
313                this.source = source;
314            }
315    
316            /**
317             * performs a transaction for inserting or updating a service meta data record in the
318             * catalogue a harvester instance belongs too
319             * 
320             * @param trans
321             * @throws SAXException
322             * @throws IOException
323             * @throws OGCWebServiceException
324             */
325            protected void performTransaction( String trans )
326                                    throws SAXException, IOException, OGCWebServiceException {
327    
328                StringReader sr = new StringReader( trans );
329                XMLFragment xml = new XMLFragment();
330                xml.load( sr, XMLFragment.DEFAULT_URL );
331                Transaction transaction = Transaction.create( "id", xml.getRootElement() );
332                CSWFactory.getService().doService( transaction );
333    
334            }
335    
336            /**
337             * creates a CSW Transaction including an Update operation for the passed meta data.
338             * 
339             * @param identifier
340             * @param xpath
341             * @param metaData
342             * @return update request
343             * @throws IOException
344             */
345            protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData )
346                                    throws IOException {
347                // read template from file
348                URL url = Templates.getTemplate( "Update_" + version );
349                String update = FileUtils.readTextFile( url ).toString();
350    
351                // remove XML doctype section to enable inserting the metadata set as string
352                // into the template
353                // TODO transform metaData into xml valid against the correct ISO AP version
354                String s = metaData.getAsString();
355                int p = s.lastIndexOf( "?>" );
356                if ( p > -1 ) {
357                    s = s.substring( p + 2, s.length() );
358                }
359    
360                // set metadata set to update by replacing the string '$metadata$'
361                // in the template
362                update = StringTools.replace( update, "$metadata$", s, false );
363                s = createConstraint( identifier, xpath ).toString();
364    
365                // set filter/constraint to identify the metadata set to be updated
366                // by replacing the string '$constraints$' in the template
367                return StringTools.replace( update, "$constraints$", s, false );
368            }
369    
370            /**
371             * creates a transaction request including a delete operation to remove the metadata record
372             * with the passed fileIdentifier from the catalogue
373             * 
374             * @param identifier
375             * @param xpath
376             * @return delete request
377             * @throws IOException
378             */
379            protected String createDeleteRequest( String identifier )
380                                    throws IOException {
381                // read template from file
382                URL url = Templates.getTemplate( "Delete_" + version );
383                String delete = FileUtils.readTextFile( url ).toString();
384    
385                String xpath = Messages.getString( "IdentifierDelete_" + version );
386                String s = createConstraint( identifier, xpath ).toString();
387    
388                // set filter/constraint to identify the metadata set to be deleted
389                // by replacing the string '$constraints$' in the template
390                return StringTools.replace( delete, "$constraints$", s, false );
391            }
392    
393            /**
394             * a constraint for delete und update operation depends on concrete metadata format. An
395             * implementing class must consider this.
396             * 
397             * @param fileIdentifier
398             *            value to be compared
399             * @param xpath
400             *            comparable property
401             * @return the constraint
402             */
403            protected abstract String createConstraint( String fileIdentifier, String xpath )
404                                    throws IOException;
405    
406            /**
407             * creates a CSW Transaction including an Update operation for the passed meta data
408             * 
409             * @param metaData
410             * @return the request
411             * @throws IOException
412             */
413            protected String createInsertRequest( XMLFragment metaData )
414                                    throws IOException {
415                // read template from file
416                URL url = Templates.getTemplate( "Insert_" + version );
417                String insert = FileUtils.readTextFile( url ).toString();
418    
419                // TODO transform metaData into xml valid against the correct ISO AP version
420                String s = metaData.getAsString();
421                int p = s.lastIndexOf( "?>" );
422                if ( p > -1 ) {
423                    s = s.substring( p + 2, s.length() );
424                }
425    
426                // set metadata set to insert by replacing the string '$metadata$'
427                // in the template
428                return StringTools.replace( insert, "$metadata$", s, false );
429    
430            }
431    
432            /**
433             * actualizes the source in the repository with timestamp of last harvesting
434             * 
435             * @param source
436             * @param date
437             * @throws SQLException
438             * @throws DBPoolException
439             */
440            protected void writeLastHarvestingTimestamp( URI source, Date date )
441                                    throws IOException, DBPoolException, SQLException {
442                HarvestRepository repository = HarvestRepository.getInstance();
443                repository.setLastHarvestingTimestamp( source, date );
444            }
445    
446            /**
447             * actualizes the source in the repository with timestamp when next harvesting shall be
448             * performed
449             * 
450             * @param source
451             * @param date
452             * @throws SQLException
453             * @throws DBPoolException
454             */
455            protected void writeNextHarvestingTimestamp( URI source, Date date )
456                                    throws IOException, DBPoolException, SQLException {
457                HarvestRepository repository = HarvestRepository.getInstance();
458                long ts = repository.getHarvestInterval( source );
459                date = new Date( ts + date.getTime() );
460                repository.setNextHarvestingTimestamp( source, date );
461            }
462    
463        }
464    
465        /**
466         * returns the XPath the metadata records identifier
467         * 
468         * @param metaData
469         * @return the XPath the metadata records identifier
470         */
471        protected String getIdentifierXPath( XMLFragment metaData ) {
472            // default is iso 19115
473            String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
474            if ( metaData != null ) {
475                String nspace = metaData.getRootElement().getNamespaceURI();
476                nspace = StringTools.replace( nspace, "http://", "", true );
477                xpath = Messages.getString( "Identifier_" + nspace );
478            }
479            return xpath;
480        }
481    
482        /**
483         * returns the XPath the metadata records identifier
484         * 
485         * @param metaData
486         * @return the XPath the metadata records identifier
487         */
488        protected String getIdentifierXPathForUpdate( XMLFragment metaData ) {
489            String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
490            if ( metaData != null ) {
491                String nspace = metaData.getRootElement().getNamespaceURI();
492                nspace = StringTools.replace( nspace, "http://", "", true );
493                xpath = Messages.getString( "IdentifierUpdate_" + nspace );
494            }
495            return xpath;
496        }
497    
498    }