Added support for more geo tags

This commit is contained in:
Morten Frederiksen 2008-08-11 09:30:28 +02:00
parent 935f4107cb
commit 149b702c88
3 changed files with 187 additions and 1 deletions

0
expunge.py Normal file → Executable file
View File

View File

@ -186,6 +186,18 @@ def content(xentry, name, detail, bozo):
xentry.appendChild(xcontent)
def location(xentry, long, lat):
""" insert geo location into the entry """
if not lat or not long: return
xlat = createTextElement(xentry, '%s:%s' % ('geo','lat'), '%f' % lat)
xlat.setAttribute('xmlns:%s' % 'geo', 'http://www.w3.org/2003/01/geo/wgs84_pos#')
xlong = createTextElement(xentry, '%s:%s' % ('geo','long'), '%f' % long)
xlong.setAttribute('xmlns:%s' % 'geo', 'http://www.w3.org/2003/01/geo/wgs84_pos#')
xentry.appendChild(xlat)
xentry.appendChild(xlong)
def source(xsource, source, bozo, format):
""" copy source information to the entry """
xdoc = xsource.ownerDocument
@ -259,6 +271,21 @@ def reconstitute(feed, entry):
entry['%s_%s' % (ns,name.lower())])
xoriglink.setAttribute('xmlns:%s' % ns, feed.namespaces[ns])
# geo location
if entry.has_key('where') and \
entry.get('where',[]).has_key('type') and \
entry.get('where',[]).has_key('coordinates'):
where = entry.get('where',[])
type = where.get('type',None)
coordinates = where.get('coordinates',None)
if type == 'Point':
location(xentry, coordinates[0], coordinates[1])
elif type == 'Box' or type == 'LineString' or type == 'Polygon':
location(xentry, coordinates[0][0], coordinates[0][1])
if entry.has_key('geo_lat') and \
entry.has_key('geo_long'):
location(xentry, (float)(entry.get('geo_long',None)), (float)(entry.get('geo_lat',None)))
# author / contributor
author_detail = entry.get('author_detail',{})
if author_detail and not author_detail.has_key('name') and \

View File

@ -397,6 +397,8 @@ class _FeedParserMixin:
'http://freshmeat.net/rss/fm/': 'fm',
'http://xmlns.com/foaf/0.1/': 'foaf',
'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo',
'http://www.georss.org/georss': 'georss',
'http://www.opengis.net/gml': 'gml',
'http://postneo.com/icbm/': 'icbm',
'http://purl.org/rss/1.0/modules/image/': 'image',
'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes',
@ -456,6 +458,10 @@ class _FeedParserMixin:
self.incontributor = 0
self.inpublisher = 0
self.insource = 0
# georss
self.ingeometry = 0
self.sourcedata = FeedParserDict()
self.contentparams = FeedParserDict()
self._summaryKey = None
@ -1269,6 +1275,89 @@ class _FeedParserMixin:
def _end_expirationdate(self):
self._save('expired_parsed', _parse_date(self.pop('expired')))
# geospatial location, or "where", from georss.org
def _start_georssgeom(self, attrsD):
self.push('geometry', 0)
_start_georss_point = _start_georssgeom
_start_georss_line = _start_georssgeom
_start_georss_polygon = _start_georssgeom
_start_georss_box = _start_georssgeom
def _save_where(self, geometry):
context = self._getContext()
context.setdefault('where', {})
context['where'] = FeedParserDict(geometry)
def _end_georss_point(self):
geometry = _parse_georss_point(self.pop('geometry'))
self._save_where(geometry)
def _end_georss_line(self):
geometry = _parse_georss_line(self.pop('geometry'))
self._save_where(geometry)
def _end_georss_polygon(self):
this = self.pop('geometry')
geometry = _parse_georss_polygon(this)
self._save_where(geometry)
def _end_georss_box(self):
geometry = _parse_georss_box(self.pop('geometry'))
self._save_where(geometry)
def _start_where(self, attrsD):
self.push('where', 0)
_start_georss_where = _start_where
def _start_gml_point(self, attrsD):
self.ingeometry = 'point'
self.push('geometry', 0)
def _start_gml_linestring(self, attrsD):
self.ingeometry = 'linestring'
self.push('geometry', 0)
def _start_gml_polygon(self, attrsD):
self.push('geometry', 0)
def _start_gml_exterior(self, attrsD):
self.push('geometry', 0)
def _start_gml_linearring(self, attrsD):
self.ingeometry = 'polygon'
self.push('geometry', 0)
def _start_gml_pos(self, attrsD):
self.push('pos', 0)
def _end_gml_pos(self):
this = self.pop('pos')
geometry = _parse_georss_point(this)
self._save_where(geometry)
def _start_gml_poslist(self, attrsD):
self.push('pos', 0)
def _end_gml_poslist(self):
geometry = _parse_poslist(self.pop('pos'), self.ingeometry)
self._save_where(geometry)
def _end_geom(self):
self.ingeometry = 0
self.pop('geometry')
_end_gml_point = _end_geom
_end_gml_linestring = _end_geom
_end_gml_linearring = _end_geom
_end_gml_exterior = _end_geom
_end_gml_polygon = _end_geom
def _end_where(self):
self.pop('where')
_end_georss_where = _end_where
# end geospatial
def _start_cc_license(self, attrsD):
context = self._getContext()
value = self._getAttribute(attrsD, 'rdf:resource')
@ -3336,7 +3425,77 @@ def _stripDoctype(data):
data = doctype_pattern.sub(replacement, head) + data
return version, data, dict(replacement and safe_pattern.findall(replacement))
# GeoRSS geometry parsers. Each return a dict with 'type' and 'coordinates'
# keys, or None in the case of a parsing error
def _parse_poslist(value, geom_type):
if geom_type == 'linestring':
return _parse_georss_line(value)
elif geom_type == 'polygon':
ring = _parse_georss_line(value)
return {'type': 'Polygon', 'coordinates': (ring['coordinates'],)}
else:
raise ValueError, "unsupported geometry type: %s" % geom_type
# Point coordinates are a 2-tuple (lon, lat)
def _parse_georss_point(value):
try:
lat, lon = value.replace(',', ' ').split()
return {'type': 'Point', 'coordinates': (float(lon), float(lat))}
except Exception, e:
if _debug:
sys.stderr.write('_parse_georss_point raised %s\n' % (handler.__name__, repr(e)))
pass
return None
# Line coordinates are a tuple of 2-tuples ((lon0, lat0), ... (lonN, latN))
def _parse_georss_line(value):
try:
latlons = value.replace(',', ' ').split()
coords = []
for i in range(0, len(latlons), 2):
lat = float(latlons[i])
lon = float(latlons[i+1])
coords.append((lon, lat))
return {'type': 'LineString', 'coordinates': tuple(coords)}
except Exception, e:
if _debug:
sys.stderr.write('_parse_georss_line raised %s\n' % repr(e))
pass
return None
# Polygon coordinates are a tuple of closed LineString tuples. The first item
# in the tuple is the exterior ring. Subsequent items are interior rings, but
# georss:polygon elements usually have no interior rings.
def _parse_georss_polygon(value):
try:
latlons = value.replace(',', ' ').split()
coords = []
for i in range(0, len(latlons), 2):
lat = float(latlons[i])
lon = float(latlons[i+1])
coords.append((lon, lat))
return {'type': 'Polygon', 'coordinates': (tuple(coords),)}
except Exception, e:
if _debug:
sys.stderr.write('_parse_georss_polygon raised %s\n' % repr(e))
pass
return None
# Box coordinates are a 2-tuple of 2-tuples ((lon_ll, lat_ll), (lon_ur, lat_ur))
def _parse_georss_box(value):
try:
vals = [float(x) for x in value.replace(',', ' ').split()]
return {'type': 'Box', 'coordinates': ((vals[1], vals[0]), (vals[3], vals[2]))}
except Exception, e:
if _debug:
sys.stderr.write('_parse_georss_box raised %s\n' % repr(e))
pass
return None
# end geospatial parsers
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[]):
'''Parse a feed from a URL, file, stream, or string'''
result = FeedParserDict()