diff --git a/expunge.py b/expunge.py old mode 100644 new mode 100755 diff --git a/planet/reconstitute.py b/planet/reconstitute.py index 88f2bfb..37d954d 100644 --- a/planet/reconstitute.py +++ b/planet/reconstitute.py @@ -186,6 +186,18 @@ def content(xentry, name, detail, bozo): xentry.appendChild(xcontent) +def location(xentry, long, lat): + """ insert geo location into the entry """ + if not lat or not long: return + + xlat = createTextElement(xentry, '%s:%s' % ('geo','lat'), '%f' % lat) + xlat.setAttribute('xmlns:%s' % 'geo', 'http://www.w3.org/2003/01/geo/wgs84_pos#') + xlong = createTextElement(xentry, '%s:%s' % ('geo','long'), '%f' % long) + xlong.setAttribute('xmlns:%s' % 'geo', 'http://www.w3.org/2003/01/geo/wgs84_pos#') + + xentry.appendChild(xlat) + xentry.appendChild(xlong) + def source(xsource, source, bozo, format): """ copy source information to the entry """ xdoc = xsource.ownerDocument @@ -259,6 +271,21 @@ def reconstitute(feed, entry): entry['%s_%s' % (ns,name.lower())]) xoriglink.setAttribute('xmlns:%s' % ns, feed.namespaces[ns]) + # geo location + if entry.has_key('where') and \ + entry.get('where',[]).has_key('type') and \ + entry.get('where',[]).has_key('coordinates'): + where = entry.get('where',[]) + type = where.get('type',None) + coordinates = where.get('coordinates',None) + if type == 'Point': + location(xentry, coordinates[0], coordinates[1]) + elif type == 'Box' or type == 'LineString' or type == 'Polygon': + location(xentry, coordinates[0][0], coordinates[0][1]) + if entry.has_key('geo_lat') and \ + entry.has_key('geo_long'): + location(xentry, (float)(entry.get('geo_long',None)), (float)(entry.get('geo_lat',None))) + # author / contributor author_detail = entry.get('author_detail',{}) if author_detail and not author_detail.has_key('name') and \ diff --git a/planet/vendor/feedparser.py b/planet/vendor/feedparser.py index a232f43..f8b91e2 100755 --- a/planet/vendor/feedparser.py +++ b/planet/vendor/feedparser.py @@ -397,6 +397,8 @@ class _FeedParserMixin: 'http://freshmeat.net/rss/fm/': 'fm', 'http://xmlns.com/foaf/0.1/': 'foaf', 'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo', + 'http://www.georss.org/georss': 'georss', + 'http://www.opengis.net/gml': 'gml', 'http://postneo.com/icbm/': 'icbm', 'http://purl.org/rss/1.0/modules/image/': 'image', 'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes', @@ -456,6 +458,10 @@ class _FeedParserMixin: self.incontributor = 0 self.inpublisher = 0 self.insource = 0 + + # georss + self.ingeometry = 0 + self.sourcedata = FeedParserDict() self.contentparams = FeedParserDict() self._summaryKey = None @@ -1269,6 +1275,89 @@ class _FeedParserMixin: def _end_expirationdate(self): self._save('expired_parsed', _parse_date(self.pop('expired'))) + # geospatial location, or "where", from georss.org + + def _start_georssgeom(self, attrsD): + self.push('geometry', 0) + _start_georss_point = _start_georssgeom + _start_georss_line = _start_georssgeom + _start_georss_polygon = _start_georssgeom + _start_georss_box = _start_georssgeom + + def _save_where(self, geometry): + context = self._getContext() + context.setdefault('where', {}) + context['where'] = FeedParserDict(geometry) + + def _end_georss_point(self): + geometry = _parse_georss_point(self.pop('geometry')) + self._save_where(geometry) + + def _end_georss_line(self): + geometry = _parse_georss_line(self.pop('geometry')) + self._save_where(geometry) + + def _end_georss_polygon(self): + this = self.pop('geometry') + geometry = _parse_georss_polygon(this) + self._save_where(geometry) + + def _end_georss_box(self): + geometry = _parse_georss_box(self.pop('geometry')) + self._save_where(geometry) + + def _start_where(self, attrsD): + self.push('where', 0) + _start_georss_where = _start_where + + def _start_gml_point(self, attrsD): + self.ingeometry = 'point' + self.push('geometry', 0) + + def _start_gml_linestring(self, attrsD): + self.ingeometry = 'linestring' + self.push('geometry', 0) + + def _start_gml_polygon(self, attrsD): + self.push('geometry', 0) + + def _start_gml_exterior(self, attrsD): + self.push('geometry', 0) + + def _start_gml_linearring(self, attrsD): + self.ingeometry = 'polygon' + self.push('geometry', 0) + + def _start_gml_pos(self, attrsD): + self.push('pos', 0) + + def _end_gml_pos(self): + this = self.pop('pos') + geometry = _parse_georss_point(this) + self._save_where(geometry) + + def _start_gml_poslist(self, attrsD): + self.push('pos', 0) + + def _end_gml_poslist(self): + geometry = _parse_poslist(self.pop('pos'), self.ingeometry) + self._save_where(geometry) + + def _end_geom(self): + self.ingeometry = 0 + self.pop('geometry') + _end_gml_point = _end_geom + _end_gml_linestring = _end_geom + _end_gml_linearring = _end_geom + _end_gml_exterior = _end_geom + _end_gml_polygon = _end_geom + + def _end_where(self): + self.pop('where') + _end_georss_where = _end_where + + # end geospatial + def _start_cc_license(self, attrsD): context = self._getContext() value = self._getAttribute(attrsD, 'rdf:resource') @@ -3336,7 +3425,77 @@ def _stripDoctype(data): data = doctype_pattern.sub(replacement, head) + data return version, data, dict(replacement and safe_pattern.findall(replacement)) - + +# GeoRSS geometry parsers. Each return a dict with 'type' and 'coordinates' +# keys, or None in the case of a parsing error + +def _parse_poslist(value, geom_type): + if geom_type == 'linestring': + return _parse_georss_line(value) + elif geom_type == 'polygon': + ring = _parse_georss_line(value) + return {'type': 'Polygon', 'coordinates': (ring['coordinates'],)} + else: + raise ValueError, "unsupported geometry type: %s" % geom_type + +# Point coordinates are a 2-tuple (lon, lat) +def _parse_georss_point(value): + try: + lat, lon = value.replace(',', ' ').split() + return {'type': 'Point', 'coordinates': (float(lon), float(lat))} + except Exception, e: + if _debug: + sys.stderr.write('_parse_georss_point raised %s\n' % (handler.__name__, repr(e))) + pass + return None + +# Line coordinates are a tuple of 2-tuples ((lon0, lat0), ... (lonN, latN)) +def _parse_georss_line(value): + try: + latlons = value.replace(',', ' ').split() + coords = [] + for i in range(0, len(latlons), 2): + lat = float(latlons[i]) + lon = float(latlons[i+1]) + coords.append((lon, lat)) + return {'type': 'LineString', 'coordinates': tuple(coords)} + except Exception, e: + if _debug: + sys.stderr.write('_parse_georss_line raised %s\n' % repr(e)) + pass + return None + +# Polygon coordinates are a tuple of closed LineString tuples. The first item +# in the tuple is the exterior ring. Subsequent items are interior rings, but +# georss:polygon elements usually have no interior rings. +def _parse_georss_polygon(value): + try: + latlons = value.replace(',', ' ').split() + coords = [] + for i in range(0, len(latlons), 2): + lat = float(latlons[i]) + lon = float(latlons[i+1]) + coords.append((lon, lat)) + return {'type': 'Polygon', 'coordinates': (tuple(coords),)} + except Exception, e: + if _debug: + sys.stderr.write('_parse_georss_polygon raised %s\n' % repr(e)) + pass + return None + +# Box coordinates are a 2-tuple of 2-tuples ((lon_ll, lat_ll), (lon_ur, lat_ur)) +def _parse_georss_box(value): + try: + vals = [float(x) for x in value.replace(',', ' ').split()] + return {'type': 'Box', 'coordinates': ((vals[1], vals[0]), (vals[3], vals[2]))} + except Exception, e: + if _debug: + sys.stderr.write('_parse_georss_box raised %s\n' % repr(e)) + pass + return None + +# end geospatial parsers + def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[]): '''Parse a feed from a URL, file, stream, or string''' result = FeedParserDict() diff --git a/tests/data/reconstitute/geo_latlong.xml b/tests/data/reconstitute/geo_latlong.xml new file mode 100644 index 0000000..02d05e6 --- /dev/null +++ b/tests/data/reconstitute/geo_latlong.xml @@ -0,0 +1,13 @@ + + + + + 40.711735 + -74.012421 + + +