001 //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/branches/2.2_testing/src/org/deegree/ogcwebservices/csw/manager/ServiceHarvester.java $
002 /*---------------- FILE HEADER ------------------------------------------
003
004 This file is part of deegree.
005 Copyright (C) 2001-2008 by:
006 EXSE, Department of Geography, University of Bonn
007 http://www.giub.uni-bonn.de/deegree/
008 lat/lon GmbH
009 http://www.lat-lon.de
010
011 This library is free software; you can redistribute it and/or
012 modify it under the terms of the GNU Lesser General Public
013 License as published by the Free Software Foundation; either
014 version 2.1 of the License, or (at your option) any later version.
015
016 This library is distributed in the hope that it will be useful,
017 but WITHOUT ANY WARRANTY; without even the implied warranty of
018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 Lesser General Public License for more details.
020
021 You should have received a copy of the GNU Lesser General Public
022 License along with this library; if not, write to the Free Software
023 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024
025 Contact:
026
027 Andreas Poth
028 lat/lon GmbH
029 Aennchenstr. 19
030 53115 Bonn
031 Germany
032 E-Mail: poth@lat-lon.de
033
034 Prof. Dr. Klaus Greve
035 Department of Geography
036 University of Bonn
037 Meckenheimer Allee 166
038 53115 Bonn
039 Germany
040 E-Mail: greve@giub.uni-bonn.de
041
042
043 ---------------------------------------------------------------------------*/
044 package org.deegree.ogcwebservices.csw.manager;
045
046 import java.io.IOException;
047 import java.net.URI;
048 import java.net.URL;
049 import java.util.Date;
050 import java.util.Iterator;
051 import java.util.List;
052
053 import javax.xml.transform.TransformerException;
054
055 import org.deegree.framework.log.ILogger;
056 import org.deegree.framework.log.LoggerFactory;
057 import org.deegree.framework.xml.XMLFragment;
058 import org.deegree.framework.xml.XMLParsingException;
059 import org.deegree.framework.xml.XMLTools;
060 import org.deegree.framework.xml.XSLTDocument;
061 import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
062 import org.xml.sax.SAXException;
063
064 /**
065 * <p>
066 * Concrete implementation of
067 *
068 * @see org.deegree.ogcwebservices.csw.manager.AbstractHarvester for harvesting service metadata
069 * from OGC web services. To enable this capabilities documents of the OWS will be accessed and
070 * transformed into a valid format that will be understood by the underlying catalogue. To
071 * enable a lot of flexibility a XSLT read from resource bundle (harvestservice.xsl) script
072 * will be used to perform the required transformation.
073 * </p>
074 * <p>
075 * A valid harvest SOURCE for a service must be a complete GetCapabilities request; the
076 * RESOURCETYPE must be 'service'. Example:
077 * </p>
078 * <p>
079 * ...?request=Harvest&version=2.0.0&source=[http://MyServer:8080/deegree?
080 * service=WFS&version=1.1.0&request=GetCapabilities]&resourceType=service&
081 * resourceFormat=text/xml&responseHandler=mailto:info@lat-lon.de&harvestInterval=P2W
082 * </p>
083 * <p>
084 * value in brackets [..] must be URL encoded and send without brackets!
085 * </p>
086 * <p>
087 * This is not absolutly compliant to OGc CSW 2.0.0 specification but Harvest definition as
088 * available from the spec is to limited because it just targets single metadata documents.
089 * </p>
090 *
091 * @version $Revision: 9345 $
092 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
093 * @author last edited by: $Author: apoth $
094 *
095 * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $
096 *
097 * @since 2.0
098 */
099 public class ServiceHarvester extends AbstractHarvester {
100
101 private static final ILogger LOG = LoggerFactory.getLogger( ServiceHarvester.class );
102
103 private static final URL xslt = ServiceHarvester.class.getResource( "harvestservice.xsl" );
104
105 private static ServiceHarvester sh = null;
106
107 /**
108 * singelton
109 *
110 * @return
111 */
112 public static ServiceHarvester getInstance() {
113 if ( sh == null ) {
114 sh = new ServiceHarvester();
115 }
116 return sh;
117 }
118
119 @Override
120 public void run() {
121 LOG.logDebug( "starting harvest iteration for ServiceHarvester." );
122 try {
123 HarvestRepository repository = HarvestRepository.getInstance();
124
125 List<URI> sources = repository.getSources();
126 for ( Iterator iter = sources.iterator(); iter.hasNext(); ) {
127 URI source = (URI) iter.next();
128 try {
129 // determine if source shall be harvested
130 if ( shallHarvest( source, ResourceType.service ) ) {
131 inProgress.add( source );
132 HarvestProcessor processor = new HarvestProcessor( this, source );
133 processor.start();
134 }
135 } catch ( Exception e ) {
136 LOG.logError( "Exception harvesting service: " + source, e );
137 informResponseHandlers( source, e );
138 }
139 }
140 } catch ( Exception e ) {
141 LOG.logError( "generell Exception harvesting services", e );
142 }
143
144 }
145
146 /**
147 * inner class for processing asynchronous harvesting of a service
148 *
149 * @version $Revision: 9345 $
150 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
151 * @author last edited by: $Author: apoth $
152 *
153 * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $
154 *
155 * @since 2.0
156 */
157 protected class HarvestProcessor extends AbstractHarvestProcessor {
158
159 HarvestProcessor( AbstractHarvester owner, URI source ) {
160 super( owner, source );
161 }
162
163 @Override
164 public void run() {
165 try {
166 HarvestRepository repository = HarvestRepository.getInstance();
167 XMLFragment capabilities = accessSourceCapabilities( source );
168 Date harvestingTimestamp = repository.getNextHarvestingTimestamp( source );
169 XMLFragment metaData = transformCapabilities( capabilities );
170 String trans = null;
171 if ( repository.getLastHarvestingTimestamp( source ) == null ) {
172 trans = createInsertRequest( metaData );
173 } else {
174 trans = createUpdateRequest( getID( metaData ),
175 "smXML:fileIdentifier/smXML:CharacterString",
176 metaData );
177 }
178 performTransaction( trans );
179 // update timestamps just if transaction has been performed
180 // successfully
181 writeLastHarvestingTimestamp( source, harvestingTimestamp );
182 writeNextHarvestingTimestamp( source, harvestingTimestamp );
183 informResponseHandlers( source );
184 } catch ( Exception e ) {
185 LOG.logError( "could not perform harvest operation for source: " + source, e );
186 try {
187 owner.informResponseHandlers( source, e );
188 } catch ( Exception ee ) {
189 ee.printStackTrace();
190 }
191 }
192 }
193
194 private String getID( XMLFragment metaData )
195 throws XMLParsingException {
196 String xpath = "smXML:fileIdentifier/smXML:CharacterString";
197 String fileIdentifier = XMLTools.getRequiredNodeAsString( metaData.getRootElement(),
198 xpath, nsc );
199 return fileIdentifier;
200 }
201
202 @Override
203 protected String createConstraint( String fileIdentifier, String xPath ) {
204 throw new UnsupportedOperationException();
205 }
206
207 /**
208 * transforms a OWS capabilities document into the desired target format
209 *
210 * @param xml
211 * @return
212 * @throws IOException
213 * @throws SAXException
214 * @throws TransformerException
215 */
216 private XMLFragment transformCapabilities( XMLFragment xml )
217 throws IOException, SAXException, TransformerException {
218
219 XSLTDocument xsltDoc = new XSLTDocument();
220 xsltDoc.load( xslt );
221
222 return xsltDoc.transform( xml );
223 }
224
225 /**
226 * returns the capabilities of
227 *
228 * @param source
229 * @return
230 * @throws IOException
231 * @throws SAXException
232 */
233 private XMLFragment accessSourceCapabilities( URI source )
234 throws IOException, SAXException {
235
236 URL url = source.toURL();
237 XMLFragment xml = new XMLFragment();
238 xml.load( url );
239 return xml;
240 }
241
242 }
243
244 }