036    package org.deegree.ogcwebservices.csw.manager;
038    import java.io.IOException;
039    import java.io.StringReader;
040    import java.net.MalformedURLException;
041    import java.net.URI;
042    import java.net.URISyntaxException;
043    import java.net.URL;
044    import java.sql.SQLException;
045    import java.util.Date;
046    import java.util.Iterator;
047    import java.util.List;
048    import java.util.Timer;
049    import java.util.TimerTask;
050    import java.util.Vector;
052    import org.apache.commons.httpclient.HttpClient;
053    import org.apache.commons.httpclient.methods.GetMethod;
054    import org.deegree.enterprise.WebUtils;
055    import org.deegree.framework.log.ILogger;
056    import org.deegree.framework.log.LoggerFactory;
057    import org.deegree.framework.mail.EMailMessage;
058    import org.deegree.framework.mail.MailHelper;
059    import org.deegree.framework.mail.MailMessage;
060    import org.deegree.framework.util.FileUtils;
061    import org.deegree.framework.util.StringTools;
062    import org.deegree.framework.xml.NamespaceContext;
063    import org.deegree.framework.xml.XMLFragment;
064    import org.deegree.io.DBPoolException;
065    import org.deegree.ogcbase.CommonNamespaces;
066    import org.deegree.ogcwebservices.OGCWebServiceException;
067    import org.deegree.ogcwebservices.csw.CSWFactory;
068    import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
069    import org.xml.sax.SAXException;
071    /**
072     * Abstract super class of all CS-W harvesters. For each kind of source a specialized harvester
073     * shall be implemented. A concrete implementation of AbstractHarvester will be called within a
074     * timer loop.
075     * 
076     * 
077     * @version $Revision: 19475 $
078     * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
079     * @author last edited by: $Author: lbuesching $
080     * 
081     * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $
082     * 
083     * @since 2.0
084     */
085    public abstract class AbstractHarvester extends TimerTask {
087        private static final ILogger LOG = LoggerFactory.getLogger( AbstractHarvester.class );
089        private boolean stopped = true;
091        private Timer timer = null;
093        protected List<URI> inProgress = new Vector<URI>();
095        protected String version;
097        protected static NamespaceContext nsc = CommonNamespaces.getNamespaceContext();
098        static {
099            try {
100                nsc.addNamespace( "smXML", new URI( "http://metadata.dgiwg.org/smXML" ) );
101                nsc.addNamespace( "iso19119", new URI( "http://schemas.opengis.net/iso19119" ) );
102                nsc.addNamespace( "iso19115", new URI( "http://schemas.opengis.net/iso19115full" ) );
103            } catch ( URISyntaxException e ) {
104                e.printStackTrace();
105            }
106        }
108        /**
109         * @param version
110         *            the version of the CSW
111         */
112        protected AbstractHarvester( String version ) {
113            this.version = version;
114        }
116        /**
117         * adds a request to the harvesting process
118         * 
119         * @param request
120         * @throws IOException
121         * @throws SQLException
122         * @throws DBPoolException
123         */
124        public void addRequest( Harvest request )
125                                throws IOException, DBPoolException, SQLException {
126            HarvestRepository.getInstance().storeRequest( request );
127        }
129        /**
130         * returns true if the harvesting process is running
131         * 
132         * @return <code>true</code> if the harvesting process is running
133         */
134        public boolean isRunning() {
135            return !stopped;
136        }
138        /**
139         * removes a request from the harvesting request.
140         * <p>
141         * <b> !! At the moment the OGC CSW does not know a mechanism/request to stop a cyclic
142         * harvesting job, so this method can not be called with a standard OGC OWS request !!</b>
143         * </p>
144         * 
145         * @param request
146         * @throws IOException
147         * @throws SQLException
148         * @throws DBPoolException
149         */
150        public void removeRequest( Harvest request )
151                                throws IOException, DBPoolException, SQLException {
152            HarvestRepository.getInstance().dropRequest( request.getSource() );
153        }
155        /**
156         * starts the harvesting process
157         * 
158         */
159        public void startHarvesting() {
160            timer = new Timer();
161            timer.schedule( this, 0, 10000 );
162            stopped = false;
163            LOG.logInfo( "harvesting has been started" );
164        }
166        /**
167         * stops the harvesting process
168         * 
169         */
170        public void stopHarvesting() {
171            timer.purge();
172            timer.cancel();
173            stopped = true;
174            LOG.logInfo( "harvesting has been stopped" );
175        }
177        /**
178         * informs all response handlers assigend to a source about successful harvesting of the source
179         * 
180         * @param source
181         * @throws URISyntaxException
182         * @throws SQLException
183         * @throws DBPoolException
184         * @throws MalformedURLException
185         */
186        protected void informResponseHandlers( URI source )
187                                throws IOException, DBPoolException, SQLException, URISyntaxException {
189            List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
191            for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
192                HarvestRepository.ResponseHandler handler = iter.next();
193                String message = StringTools.concat( 100, "source: ", source, " has been harvested successfully!" );
194                sendMessage( handler, message );
195            }
197        }
199        /**
200         * returns true if the passed source shall be harvested. this is true if a source has not been
201         * harvested before or the next harvesting timestamp has been reached and the source is of type
202         * 
203         * @see HarvestRepository.ResourceType service
204         * 
205         * @param source
206         * @return <code>true</code> if the passed source shall be harvested
207         * @throws DBPoolException
208         * @throws SQLException
209         */
210        protected boolean shallHarvest( URI source, ResourceType targetType )
211                                throws IOException, DBPoolException, SQLException {
213            if ( inProgress.contains( source ) ) {
214                return false;
215            }
217            HarvestRepository repository = HarvestRepository.getInstance();
219            ResourceType st = repository.getSourceType( source );
221            if ( !st.equals( targetType ) ) {
222                return false;
223            }
225            Date lastHarvesting = repository.getLastHarvestingTimestamp( source );
226            Date nextHarvesting = repository.getNextHarvestingTimestamp( source );
228            long tmp = System.currentTimeMillis() - nextHarvesting.getTime();
229            return lastHarvesting == null || tmp >= 0 || repository.shallForceHarvesting( source );
230        }
232        /**
233         * informs all response handlers assigend to a source about an exception that occurs when
234         * harvesting a source
235         * 
236         * @param source
237         * @param e
238         * @throws URISyntaxException
239         * @throws SQLException
240         * @throws DBPoolException
241         */
242        protected void informResponseHandlers( URI source, Throwable e )
243                                throws IOException, DBPoolException, SQLException, URISyntaxException {
245            List<HarvestRepository.ResponseHandler> list = HarvestRepository.getInstance().getResponseHandlers( source );
247            for ( Iterator<HarvestRepository.ResponseHandler> iter = list.iterator(); iter.hasNext(); ) {
248                HarvestRepository.ResponseHandler handler = iter.next();
249                String message = StringTools.concat( 500, "exception occures harvesting source: ", source, "; exception: ",
250                                                     e.getMessage() );
251                sendMessage( handler, message );
253            }
255        }
257        /**
258         * 
259         * @param handler
260         * @param message
261         */
262        private void sendMessage( HarvestRepository.ResponseHandler handler, String message ) {
263            if ( handler.isMailAddress() ) {
264                String s = handler.getUri().toASCIIString();
265                int p = s.indexOf( ":" );
266                s = s.substring( p + 1, s.length() );
267                LOG.logDebug( "sending message", message );
268                LOG.logDebug( StringTools.concat( 200, "informing response handler ", s, "via mail" ) );
269                MailMessage mm = new EMailMessage( "info@lat-lon.de", s, "CS-W harvesting", message );
270                try {
271                    MailHelper.createAndSendMail( mm, System.getProperty( "mailHost" ) );
272                } catch ( Exception e ) {
273                    LOG.logInfo( "could not send mail to admin:", e.getMessage() );
274                    LOG.logError( e.getMessage(), e );
275                }
276            } else {
277                LOG.logDebug( StringTools.concat( 200, "informing response handler ", handler.getUri(), "via HTTP GET" ) );
278                HttpClient client = new HttpClient();
279                LOG.logDebug( message );
281                try {
282                    client = WebUtils.enableProxyUsage( client, handler.getUri().toURL() );
283                    GetMethod get = new GetMethod( handler.getUri().toURL().toExternalForm() + "?message=" + message );
284                    client.executeMethod( get );
285                } catch ( Exception e ) {
286                    LOG.logInfo( "could not post message: '" + message + "' to: " + handler.getUri() + "; reason: "
287                                 + e.getMessage() );
288                }
290            }
291        }
293        /**
294         * abstract super class for all harvest processores
295         * 
296         * 
297         * @version $Revision: 19475 $
298         * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
299         * @author last edited by: $Author: lbuesching $
300         * 
301         * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $
302         * 
303         * @since 2.0
304         */
305        protected abstract class AbstractHarvestProcessor extends Thread {
307            protected URI source = null;
309            protected AbstractHarvester owner = null;
311            protected AbstractHarvestProcessor( AbstractHarvester owner, URI source ) {
312                this.owner = owner;
313                this.source = source;
314            }
316            /**
317             * performs a transaction for inserting or updating a service meta data record in the
318             * catalogue a harvester instance belongs too
319             * 
320             * @param trans
321             * @throws SAXException
322             * @throws IOException
323             * @throws OGCWebServiceException
324             */
325            protected void performTransaction( String trans )
326                                    throws SAXException, IOException, OGCWebServiceException {
328                StringReader sr = new StringReader( trans );
329                XMLFragment xml = new XMLFragment();
330                xml.load( sr, XMLFragment.DEFAULT_URL );
331                Transaction transaction = Transaction.create( "id", xml.getRootElement() );
332                CSWFactory.getService().doService( transaction );
334            }
336            /**
337             * creates a CSW Transaction including an Update operation for the passed meta data.
338             * 
339             * @param identifier
340             * @param xpath
341             * @param metaData
342             * @return update request
343             * @throws IOException
344             */
345            protected String createUpdateRequest( String identifier, String xpath, XMLFragment metaData )
346                                    throws IOException {
347                // read template from file
348                URL url = Templates.getTemplate( "Update_" + version );
349                String update = FileUtils.readTextFile( url ).toString();
351                // remove XML doctype section to enable inserting the metadata set as string
352                // into the template
353                // TODO transform metaData into xml valid against the correct ISO AP version
354                String s = metaData.getAsString();
355                int p = s.lastIndexOf( "?>" );
356                if ( p > -1 ) {
357                    s = s.substring( p + 2, s.length() );
358                }
360                // set metadata set to update by replacing the string '$metadata$'
361                // in the template
362                update = StringTools.replace( update, "$metadata$", s, false );
363                s = createConstraint( identifier, xpath ).toString();
365                // set filter/constraint to identify the metadata set to be updated
366                // by replacing the string '$constraints$' in the template
367                return StringTools.replace( update, "$constraints$", s, false );
368            }
370            /**
371             * creates a transaction request including a delete operation to remove the metadata record
372             * with the passed fileIdentifier from the catalogue
373             * 
374             * @param identifier
375             * @param xpath
376             * @return delete request
377             * @throws IOException
378             */
379            protected String createDeleteRequest( String identifier )
380                                    throws IOException {
381                // read template from file
382                URL url = Templates.getTemplate( "Delete_" + version );
383                String delete = FileUtils.readTextFile( url ).toString();
385                String xpath = Messages.getString( "IdentifierDelete_" + version );
386                String s = createConstraint( identifier, xpath ).toString();
388                // set filter/constraint to identify the metadata set to be deleted
389                // by replacing the string '$constraints$' in the template
390                return StringTools.replace( delete, "$constraints$", s, false );
391            }
393            /**
394             * a constraint for delete und update operation depends on concrete metadata format. An
395             * implementing class must consider this.
396             * 
397             * @param fileIdentifier
398             *            value to be compared
399             * @param xpath
400             *            comparable property
401             * @return the constraint
402             */
403            protected abstract String createConstraint( String fileIdentifier, String xpath )
404                                    throws IOException;
406            /**
407             * creates a CSW Transaction including an Update operation for the passed meta data
408             * 
409             * @param metaData
410             * @return the request
411             * @throws IOException
412             */
413            protected String createInsertRequest( XMLFragment metaData )
414                                    throws IOException {
415                // read template from file
416                URL url = Templates.getTemplate( "Insert_" + version );
417                String insert = FileUtils.readTextFile( url ).toString();
419                // TODO transform metaData into xml valid against the correct ISO AP version
420                String s = metaData.getAsString();
421                int p = s.lastIndexOf( "?>" );
422                if ( p > -1 ) {
423                    s = s.substring( p + 2, s.length() );
424                }
426                // set metadata set to insert by replacing the string '$metadata$'
427                // in the template
428                return StringTools.replace( insert, "$metadata$", s, false );
430            }
432            /**
433             * actualizes the source in the repository with timestamp of last harvesting
434             * 
435             * @param source
436             * @param date
437             * @throws SQLException
438             * @throws DBPoolException
439             */
440            protected void writeLastHarvestingTimestamp( URI source, Date date )
441                                    throws IOException, DBPoolException, SQLException {
442                HarvestRepository repository = HarvestRepository.getInstance();
443                repository.setLastHarvestingTimestamp( source, date );
444            }
446            /**
447             * actualizes the source in the repository with timestamp when next harvesting shall be
448             * performed
449             * 
450             * @param source
451             * @param date
452             * @throws SQLException
453             * @throws DBPoolException
454             */
455            protected void writeNextHarvestingTimestamp( URI source, Date date )
456                                    throws IOException, DBPoolException, SQLException {
457                HarvestRepository repository = HarvestRepository.getInstance();
458                long ts = repository.getHarvestInterval( source );
459                date = new Date( ts + date.getTime() );
460                repository.setNextHarvestingTimestamp( source, date );
461            }
463        }
465        /**
466         * returns the XPath the metadata records identifier
467         * 
468         * @param metaData
469         * @return the XPath the metadata records identifier
470         */
471        protected String getIdentifierXPath( XMLFragment metaData ) {
472            // default is iso 19115
473            String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
474            if ( metaData != null ) {
475                String nspace = metaData.getRootElement().getNamespaceURI();
476                nspace = StringTools.replace( nspace, "http://", "", true );
477                xpath = Messages.getString( "Identifier_" + nspace );
478            }
479            return xpath;
480        }
482        /**
483         * returns the XPath the metadata records identifier
484         * 
485         * @param metaData
486         * @return the XPath the metadata records identifier
487         */
488        protected String getIdentifierXPathForUpdate( XMLFragment metaData ) {
489            String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
490            if ( metaData != null ) {
491                String nspace = metaData.getRootElement().getNamespaceURI();
492                nspace = StringTools.replace( nspace, "http://", "", true );
493                xpath = Messages.getString( "IdentifierUpdate_" + nspace );
494            }
495            return xpath;
496        }
498    }