001    //$HeadURL: https://svn.wald.intevation.org/svn/deegree/base/branches/2.3_testing/src/org/deegree/ogcwebservices/csw/manager/ServiceHarvester.java $
002    /*----------------------------------------------------------------------------
003     This file is part of deegree, http://deegree.org/
004     Copyright (C) 2001-2009 by:
005     Department of Geography, University of Bonn
006     and
007     lat/lon GmbH
008    
009     This library is free software; you can redistribute it and/or modify it under
010     the terms of the GNU Lesser General Public License as published by the Free
011     Software Foundation; either version 2.1 of the License, or (at your option)
012     any later version.
013     This library is distributed in the hope that it will be useful, but WITHOUT
014     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
015     FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
016     details.
017     You should have received a copy of the GNU Lesser General Public License
018     along with this library; if not, write to the Free Software Foundation, Inc.,
019     59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
020    
021     Contact information:
022    
023     lat/lon GmbH
024     Aennchenstr. 19, 53177 Bonn
025     Germany
026     http://lat-lon.de/
027    
028     Department of Geography, University of Bonn
029     Prof. Dr. Klaus Greve
030     Postfach 1147, 53001 Bonn
031     Germany
032     http://www.geographie.uni-bonn.de/deegree/
033    
034     e-mail: info@deegree.org
035     ----------------------------------------------------------------------------*/
036    package org.deegree.ogcwebservices.csw.manager;
037    
038    import java.io.IOException;
039    import java.net.URI;
040    import java.net.URL;
041    import java.util.Date;
042    import java.util.Iterator;
043    import java.util.List;
044    
045    import javax.xml.transform.TransformerException;
046    
047    import org.deegree.framework.log.ILogger;
048    import org.deegree.framework.log.LoggerFactory;
049    import org.deegree.framework.xml.XMLFragment;
050    import org.deegree.framework.xml.XMLParsingException;
051    import org.deegree.framework.xml.XMLTools;
052    import org.deegree.framework.xml.XSLTDocument;
053    import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
054    import org.xml.sax.SAXException;
055    
056    /**
057     * <p>
058     * Concrete implementation of
059     * 
060     * @see org.deegree.ogcwebservices.csw.manager.AbstractHarvester for harvesting service metadata
061     *      from OGC web services. To enable this capabilities documents of the OWS will be accessed and
062     *      transformed into a valid format that will be understood by the underlying catalogue. To
063     *      enable a lot of flexibility a XSLT read from resource bundle (harvestservice.xsl) script
064     *      will be used to perform the required transformation.
065     *      </p>
066     *      <p>
067     *      A valid harvest SOURCE for a service must be a complete GetCapabilities request; the
068     *      RESOURCETYPE must be 'service'. Example:
069     *      </p>
070     *      <p>
071     *      ...?request=Harvest&version=2.0.0&source=[http://MyServer:8080/deegree?
072     *      service=WFS&version=1.1.0&request=GetCapabilities]&resourceType=service&
073     *      resourceFormat=text/xml&responseHandler=mailto:info@lat-lon.de&harvestInterval=P2W
074     *      </p>
075     *      <p>
076     *      value in brackets [..] must be URL encoded and send without brackets!
077     *      </p>
078     *      <p>
079     *      This is not absolutly compliant to OGc CSW 2.0.0 specification but Harvest definition as
080     *      available from the spec is to limited because it just targets single metadata documents.
081     *      </p>
082     * 
083     * @version $Revision: 19475 $
084     * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
085     * @author last edited by: $Author: lbuesching $
086     * 
087     * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $
088     * 
089     * @since 2.0
090     */
091    public class ServiceHarvester extends AbstractHarvester {
092    
093        static final ILogger LOG = LoggerFactory.getLogger( ServiceHarvester.class );
094    
095        static final URL xslt = ServiceHarvester.class.getResource( "harvestservice.xsl" );
096    
097        private static ServiceHarvester sh = null;
098    
099        /**
100         * @param version
101         *            the version of the CSW
102         */
103        public ServiceHarvester( String version ) {
104            super( version );
105        }
106    
107        /**
108         * singelton
109         * 
110         * @param version
111         *            the version of the CSW
112         * 
113         * @return the new instance
114         */
115        public static ServiceHarvester getInstance( String version ) {
116            if ( sh == null ) {
117                sh = new ServiceHarvester( version );
118            }
119            return sh;
120        }
121    
122        @Override
123        public void run() {
124            LOG.logDebug( "starting harvest iteration for ServiceHarvester." );
125            try {
126                HarvestRepository repository = HarvestRepository.getInstance();
127    
128                List<URI> sources = repository.getSources();
129                for ( Iterator<URI> iter = sources.iterator(); iter.hasNext(); ) {
130                    URI source = iter.next();
131                    try {
132                        // determine if source shall be harvested
133                        if ( shallHarvest( source, ResourceType.service ) ) {
134                            inProgress.add( source );
135                            HarvestProcessor processor = new HarvestProcessor( this, source );
136                            processor.start();
137                        }
138                    } catch ( Exception e ) {
139                        LOG.logError( "Exception harvesting service: " + source, e );
140                        informResponseHandlers( source, e );
141                    }
142                }
143            } catch ( Exception e ) {
144                LOG.logError( "generell Exception harvesting services", e );
145            }
146    
147        }
148    
149        /**
150         * inner class for processing asynchronous harvesting of a service
151         * 
152         * @version $Revision: 19475 $
153         * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
154         * @author last edited by: $Author: lbuesching $
155         * 
156         * @version 1.0. $Revision: 19475 $, $Date: 2009-09-02 14:51:48 +0200 (Mi, 02. Sep 2009) $
157         * 
158         * @since 2.0
159         */
160        protected class HarvestProcessor extends AbstractHarvestProcessor {
161    
162            HarvestProcessor( AbstractHarvester owner, URI source ) {
163                super( owner, source );
164            }
165    
166            @Override
167            public void run() {
168                try {
169                    HarvestRepository repository = HarvestRepository.getInstance();
170                    XMLFragment capabilities = accessSourceCapabilities( source );
171                    Date harvestingTimestamp = repository.getNextHarvestingTimestamp( source );
172                    XMLFragment metaData = transformCapabilities( capabilities );
173                    String trans = null;
174                    if ( repository.getLastHarvestingTimestamp( source ) == null ) {
175                        trans = createInsertRequest( metaData );
176                    } else {
177                        trans = createUpdateRequest( getID( metaData ), getIdentifierXPathForUpdate( metaData ), metaData );
178                    }
179                    performTransaction( trans );
180                    // update timestamps just if transaction has been performed
181                    // successfully
182                    writeLastHarvestingTimestamp( source, harvestingTimestamp );
183                    writeNextHarvestingTimestamp( source, harvestingTimestamp );
184                    informResponseHandlers( source );
185                } catch ( Exception e ) {
186                    LOG.logError( "could not perform harvest operation for source: " + source, e );
187                    try {
188                        owner.informResponseHandlers( source, e );
189                    } catch ( Exception ee ) {
190                        ee.printStackTrace();
191                    }
192                }
193            }
194    
195            private String getID( XMLFragment metaData )
196                                    throws XMLParsingException {
197                String xpath = getIdentifierXPath( metaData );
198                String fileIdentifier = XMLTools.getRequiredNodeAsString( metaData.getRootElement(), xpath, nsc );
199                return fileIdentifier;
200            }
201    
202            @Override
203            protected String createConstraint( String fileIdentifier, String xPath ) {
204                throw new UnsupportedOperationException();
205            }
206    
207            /**
208             * transforms a OWS capabilities document into the desired target format
209             * 
210             * @param xml
211             * @return the transformed document
212             * @throws IOException
213             * @throws SAXException
214             * @throws TransformerException
215             */
216            private XMLFragment transformCapabilities( XMLFragment xml )
217                                    throws IOException, SAXException, TransformerException {
218    
219                XSLTDocument xsltDoc = new XSLTDocument();
220                xsltDoc.load( xslt );
221    
222                return xsltDoc.transform( xml );
223            }
224    
225            /**
226             * returns the capabilities of
227             * 
228             * @param source
229             * @return the capabilities
230             * @throws IOException
231             * @throws SAXException
232             */
233            private XMLFragment accessSourceCapabilities( URI source )
234                                    throws IOException, SAXException {
235    
236                URL url = source.toURL();
237                XMLFragment xml = new XMLFragment();
238                xml.load( url );
239                return xml;
240            }
241    
242        }
243    
244    }