001    //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/branches/2.2_testing/src/org/deegree/ogcwebservices/csw/manager/ServiceHarvester.java $
002    /*----------------    FILE HEADER  ------------------------------------------
003    
004     This file is part of deegree.
005     Copyright (C) 2001-2008 by:
006     EXSE, Department of Geography, University of Bonn
007     http://www.giub.uni-bonn.de/deegree/
008     lat/lon GmbH
009     http://www.lat-lon.de
010    
011     This library is free software; you can redistribute it and/or
012     modify it under the terms of the GNU Lesser General Public
013     License as published by the Free Software Foundation; either
014     version 2.1 of the License, or (at your option) any later version.
015    
016     This library is distributed in the hope that it will be useful,
017     but WITHOUT ANY WARRANTY; without even the implied warranty of
018     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
019     Lesser General Public License for more details.
020    
021     You should have received a copy of the GNU Lesser General Public
022     License along with this library; if not, write to the Free Software
023     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
024    
025     Contact:
026    
027     Andreas Poth
028     lat/lon GmbH
029     Aennchenstr. 19
030     53115 Bonn
031     Germany
032     E-Mail: poth@lat-lon.de
033    
034     Prof. Dr. Klaus Greve
035     Department of Geography
036     University of Bonn
037     Meckenheimer Allee 166
038     53115 Bonn
039     Germany
040     E-Mail: greve@giub.uni-bonn.de
041    
042     
043     ---------------------------------------------------------------------------*/
044    package org.deegree.ogcwebservices.csw.manager;
045    
046    import java.io.IOException;
047    import java.net.URI;
048    import java.net.URL;
049    import java.util.Date;
050    import java.util.Iterator;
051    import java.util.List;
052    
053    import javax.xml.transform.TransformerException;
054    
055    import org.deegree.framework.log.ILogger;
056    import org.deegree.framework.log.LoggerFactory;
057    import org.deegree.framework.xml.XMLFragment;
058    import org.deegree.framework.xml.XMLParsingException;
059    import org.deegree.framework.xml.XMLTools;
060    import org.deegree.framework.xml.XSLTDocument;
061    import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
062    import org.xml.sax.SAXException;
063    
064    /**
065     * <p>
066     * Concrete implementation of
067     * 
068     * @see org.deegree.ogcwebservices.csw.manager.AbstractHarvester for harvesting service metadata
069     *      from OGC web services. To enable this capabilities documents of the OWS will be accessed and
070     *      transformed into a valid format that will be understood by the underlying catalogue. To
071     *      enable a lot of flexibility a XSLT read from resource bundle (harvestservice.xsl) script
072     *      will be used to perform the required transformation.
073     *      </p>
074     *      <p>
075     *      A valid harvest SOURCE for a service must be a complete GetCapabilities request; the
076     *      RESOURCETYPE must be 'service'. Example:
077     *      </p>
078     *      <p>
079     *      ...?request=Harvest&version=2.0.0&source=[http://MyServer:8080/deegree?
080     *      service=WFS&version=1.1.0&request=GetCapabilities]&resourceType=service&
081     *      resourceFormat=text/xml&responseHandler=mailto:info@lat-lon.de&harvestInterval=P2W
082     *      </p>
083     *      <p>
084     *      value in brackets [..] must be URL encoded and send without brackets!
085     *      </p>
086     *      <p>
087     *      This is not absolutly compliant to OGc CSW 2.0.0 specification but Harvest definition as
088     *      available from the spec is to limited because it just targets single metadata documents.
089     *      </p>
090     * 
091     * @version $Revision: 9345 $
092     * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
093     * @author last edited by: $Author: apoth $
094     * 
095     * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $
096     * 
097     * @since 2.0
098     */
099    public class ServiceHarvester extends AbstractHarvester {
100    
101        private static final ILogger LOG = LoggerFactory.getLogger( ServiceHarvester.class );
102    
103        private static final URL xslt = ServiceHarvester.class.getResource( "harvestservice.xsl" );
104    
105        private static ServiceHarvester sh = null;
106    
107        /**
108         * singelton
109         * 
110         * @return
111         */
112        public static ServiceHarvester getInstance() {
113            if ( sh == null ) {
114                sh = new ServiceHarvester();
115            }
116            return sh;
117        }
118    
119        @Override
120        public void run() {
121            LOG.logDebug( "starting harvest iteration for ServiceHarvester." );
122            try {
123                HarvestRepository repository = HarvestRepository.getInstance();
124    
125                List<URI> sources = repository.getSources();
126                for ( Iterator iter = sources.iterator(); iter.hasNext(); ) {
127                    URI source = (URI) iter.next();
128                    try {
129                        // determine if source shall be harvested
130                        if ( shallHarvest( source, ResourceType.service ) ) {
131                            inProgress.add( source );
132                            HarvestProcessor processor = new HarvestProcessor( this, source );
133                            processor.start();
134                        }
135                    } catch ( Exception e ) {
136                        LOG.logError( "Exception harvesting service: " + source, e );
137                        informResponseHandlers( source, e );
138                    }
139                }
140            } catch ( Exception e ) {
141                LOG.logError( "generell Exception harvesting services", e );
142            }
143    
144        }
145    
146        /**
147         * inner class for processing asynchronous harvesting of a service
148         * 
149         * @version $Revision: 9345 $
150         * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
151         * @author last edited by: $Author: apoth $
152         * 
153         * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $
154         * 
155         * @since 2.0
156         */
157        protected class HarvestProcessor extends AbstractHarvestProcessor {
158    
159            HarvestProcessor( AbstractHarvester owner, URI source ) {
160                super( owner, source );
161            }
162    
163            @Override
164            public void run() {
165                try {
166                    HarvestRepository repository = HarvestRepository.getInstance();
167                    XMLFragment capabilities = accessSourceCapabilities( source );
168                    Date harvestingTimestamp = repository.getNextHarvestingTimestamp( source );
169                    XMLFragment metaData = transformCapabilities( capabilities );
170                    String trans = null;
171                    if ( repository.getLastHarvestingTimestamp( source ) == null ) {
172                        trans = createInsertRequest( metaData );
173                    } else {
174                        trans = createUpdateRequest( getID( metaData ),
175                                                     "smXML:fileIdentifier/smXML:CharacterString",
176                                                     metaData );
177                    }
178                    performTransaction( trans );
179                    // update timestamps just if transaction has been performed
180                    // successfully
181                    writeLastHarvestingTimestamp( source, harvestingTimestamp );
182                    writeNextHarvestingTimestamp( source, harvestingTimestamp );
183                    informResponseHandlers( source );
184                } catch ( Exception e ) {
185                    LOG.logError( "could not perform harvest operation for source: " + source, e );
186                    try {
187                        owner.informResponseHandlers( source, e );
188                    } catch ( Exception ee ) {
189                        ee.printStackTrace();
190                    }
191                }
192            }
193    
194            private String getID( XMLFragment metaData )
195                                    throws XMLParsingException {
196                String xpath = "smXML:fileIdentifier/smXML:CharacterString";
197                String fileIdentifier = XMLTools.getRequiredNodeAsString( metaData.getRootElement(),
198                                                                          xpath, nsc );
199                return fileIdentifier;
200            }
201    
202            @Override
203            protected String createConstraint( String fileIdentifier, String xPath ) {
204                throw new UnsupportedOperationException();
205            }
206    
207            /**
208             * transforms a OWS capabilities document into the desired target format
209             * 
210             * @param xml
211             * @return
212             * @throws IOException
213             * @throws SAXException
214             * @throws TransformerException
215             */
216            private XMLFragment transformCapabilities( XMLFragment xml )
217                                    throws IOException, SAXException, TransformerException {
218    
219                XSLTDocument xsltDoc = new XSLTDocument();
220                xsltDoc.load( xslt );
221    
222                return xsltDoc.transform( xml );
223            }
224    
225            /**
226             * returns the capabilities of
227             * 
228             * @param source
229             * @return
230             * @throws IOException
231             * @throws SAXException
232             */
233            private XMLFragment accessSourceCapabilities( URI source )
234                                    throws IOException, SAXException {
235    
236                URL url = source.toURL();
237                XMLFragment xml = new XMLFragment();
238                xml.load( url );
239                return xml;
240            }
241    
242        }
243    
244    }