001 //$HeadURL: svn+ssh://rbezema@svn.wald.intevation.org/deegree/base/branches/2.2_testing/src/org/deegree/ogcwebservices/csw/manager/ServiceHarvester.java $ 002 /*---------------- FILE HEADER ------------------------------------------ 003 004 This file is part of deegree. 005 Copyright (C) 2001-2008 by: 006 EXSE, Department of Geography, University of Bonn 007 http://www.giub.uni-bonn.de/deegree/ 008 lat/lon GmbH 009 http://www.lat-lon.de 010 011 This library is free software; you can redistribute it and/or 012 modify it under the terms of the GNU Lesser General Public 013 License as published by the Free Software Foundation; either 014 version 2.1 of the License, or (at your option) any later version. 015 016 This library is distributed in the hope that it will be useful, 017 but WITHOUT ANY WARRANTY; without even the implied warranty of 018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 Lesser General Public License for more details. 020 021 You should have received a copy of the GNU Lesser General Public 022 License along with this library; if not, write to the Free Software 023 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 024 025 Contact: 026 027 Andreas Poth 028 lat/lon GmbH 029 Aennchenstr. 19 030 53115 Bonn 031 Germany 032 E-Mail: poth@lat-lon.de 033 034 Prof. Dr. Klaus Greve 035 Department of Geography 036 University of Bonn 037 Meckenheimer Allee 166 038 53115 Bonn 039 Germany 040 E-Mail: greve@giub.uni-bonn.de 041 042 043 ---------------------------------------------------------------------------*/ 044 package org.deegree.ogcwebservices.csw.manager; 045 046 import java.io.IOException; 047 import java.net.URI; 048 import java.net.URL; 049 import java.util.Date; 050 import java.util.Iterator; 051 import java.util.List; 052 053 import javax.xml.transform.TransformerException; 054 055 import org.deegree.framework.log.ILogger; 056 import org.deegree.framework.log.LoggerFactory; 057 import org.deegree.framework.xml.XMLFragment; 058 import org.deegree.framework.xml.XMLParsingException; 059 import org.deegree.framework.xml.XMLTools; 060 import org.deegree.framework.xml.XSLTDocument; 061 import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType; 062 import org.xml.sax.SAXException; 063 064 /** 065 * <p> 066 * Concrete implementation of 067 * 068 * @see org.deegree.ogcwebservices.csw.manager.AbstractHarvester for harvesting service metadata 069 * from OGC web services. To enable this capabilities documents of the OWS will be accessed and 070 * transformed into a valid format that will be understood by the underlying catalogue. To 071 * enable a lot of flexibility a XSLT read from resource bundle (harvestservice.xsl) script 072 * will be used to perform the required transformation. 073 * </p> 074 * <p> 075 * A valid harvest SOURCE for a service must be a complete GetCapabilities request; the 076 * RESOURCETYPE must be 'service'. Example: 077 * </p> 078 * <p> 079 * ...?request=Harvest&version=2.0.0&source=[http://MyServer:8080/deegree? 080 * service=WFS&version=1.1.0&request=GetCapabilities]&resourceType=service& 081 * resourceFormat=text/xml&responseHandler=mailto:info@lat-lon.de&harvestInterval=P2W 082 * </p> 083 * <p> 084 * value in brackets [..] must be URL encoded and send without brackets! 085 * </p> 086 * <p> 087 * This is not absolutly compliant to OGc CSW 2.0.0 specification but Harvest definition as 088 * available from the spec is to limited because it just targets single metadata documents. 089 * </p> 090 * 091 * @version $Revision: 9345 $ 092 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 093 * @author last edited by: $Author: apoth $ 094 * 095 * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $ 096 * 097 * @since 2.0 098 */ 099 public class ServiceHarvester extends AbstractHarvester { 100 101 private static final ILogger LOG = LoggerFactory.getLogger( ServiceHarvester.class ); 102 103 private static final URL xslt = ServiceHarvester.class.getResource( "harvestservice.xsl" ); 104 105 private static ServiceHarvester sh = null; 106 107 /** 108 * singelton 109 * 110 * @return 111 */ 112 public static ServiceHarvester getInstance() { 113 if ( sh == null ) { 114 sh = new ServiceHarvester(); 115 } 116 return sh; 117 } 118 119 @Override 120 public void run() { 121 LOG.logDebug( "starting harvest iteration for ServiceHarvester." ); 122 try { 123 HarvestRepository repository = HarvestRepository.getInstance(); 124 125 List<URI> sources = repository.getSources(); 126 for ( Iterator iter = sources.iterator(); iter.hasNext(); ) { 127 URI source = (URI) iter.next(); 128 try { 129 // determine if source shall be harvested 130 if ( shallHarvest( source, ResourceType.service ) ) { 131 inProgress.add( source ); 132 HarvestProcessor processor = new HarvestProcessor( this, source ); 133 processor.start(); 134 } 135 } catch ( Exception e ) { 136 LOG.logError( "Exception harvesting service: " + source, e ); 137 informResponseHandlers( source, e ); 138 } 139 } 140 } catch ( Exception e ) { 141 LOG.logError( "generell Exception harvesting services", e ); 142 } 143 144 } 145 146 /** 147 * inner class for processing asynchronous harvesting of a service 148 * 149 * @version $Revision: 9345 $ 150 * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a> 151 * @author last edited by: $Author: apoth $ 152 * 153 * @version 1.0. $Revision: 9345 $, $Date: 2007-12-27 17:22:25 +0100 (Do, 27 Dez 2007) $ 154 * 155 * @since 2.0 156 */ 157 protected class HarvestProcessor extends AbstractHarvestProcessor { 158 159 HarvestProcessor( AbstractHarvester owner, URI source ) { 160 super( owner, source ); 161 } 162 163 @Override 164 public void run() { 165 try { 166 HarvestRepository repository = HarvestRepository.getInstance(); 167 XMLFragment capabilities = accessSourceCapabilities( source ); 168 Date harvestingTimestamp = repository.getNextHarvestingTimestamp( source ); 169 XMLFragment metaData = transformCapabilities( capabilities ); 170 String trans = null; 171 if ( repository.getLastHarvestingTimestamp( source ) == null ) { 172 trans = createInsertRequest( metaData ); 173 } else { 174 trans = createUpdateRequest( getID( metaData ), 175 "smXML:fileIdentifier/smXML:CharacterString", 176 metaData ); 177 } 178 performTransaction( trans ); 179 // update timestamps just if transaction has been performed 180 // successfully 181 writeLastHarvestingTimestamp( source, harvestingTimestamp ); 182 writeNextHarvestingTimestamp( source, harvestingTimestamp ); 183 informResponseHandlers( source ); 184 } catch ( Exception e ) { 185 LOG.logError( "could not perform harvest operation for source: " + source, e ); 186 try { 187 owner.informResponseHandlers( source, e ); 188 } catch ( Exception ee ) { 189 ee.printStackTrace(); 190 } 191 } 192 } 193 194 private String getID( XMLFragment metaData ) 195 throws XMLParsingException { 196 String xpath = "smXML:fileIdentifier/smXML:CharacterString"; 197 String fileIdentifier = XMLTools.getRequiredNodeAsString( metaData.getRootElement(), 198 xpath, nsc ); 199 return fileIdentifier; 200 } 201 202 @Override 203 protected String createConstraint( String fileIdentifier, String xPath ) { 204 throw new UnsupportedOperationException(); 205 } 206 207 /** 208 * transforms a OWS capabilities document into the desired target format 209 * 210 * @param xml 211 * @return 212 * @throws IOException 213 * @throws SAXException 214 * @throws TransformerException 215 */ 216 private XMLFragment transformCapabilities( XMLFragment xml ) 217 throws IOException, SAXException, TransformerException { 218 219 XSLTDocument xsltDoc = new XSLTDocument(); 220 xsltDoc.load( xslt ); 221 222 return xsltDoc.transform( xml ); 223 } 224 225 /** 226 * returns the capabilities of 227 * 228 * @param source 229 * @return 230 * @throws IOException 231 * @throws SAXException 232 */ 233 private XMLFragment accessSourceCapabilities( URI source ) 234 throws IOException, SAXException { 235 236 URL url = source.toURL(); 237 XMLFragment xml = new XMLFragment(); 238 xml.load( url ); 239 return xml; 240 } 241 242 } 243 244 }