Added support for more geo tags
This commit is contained in:
parent
935f4107cb
commit
149b702c88
0
expunge.py
Normal file → Executable file
0
expunge.py
Normal file → Executable file
@ -186,6 +186,18 @@ def content(xentry, name, detail, bozo):
|
||||
|
||||
xentry.appendChild(xcontent)
|
||||
|
||||
def location(xentry, long, lat):
|
||||
""" insert geo location into the entry """
|
||||
if not lat or not long: return
|
||||
|
||||
xlat = createTextElement(xentry, '%s:%s' % ('geo','lat'), '%f' % lat)
|
||||
xlat.setAttribute('xmlns:%s' % 'geo', 'http://www.w3.org/2003/01/geo/wgs84_pos#')
|
||||
xlong = createTextElement(xentry, '%s:%s' % ('geo','long'), '%f' % long)
|
||||
xlong.setAttribute('xmlns:%s' % 'geo', 'http://www.w3.org/2003/01/geo/wgs84_pos#')
|
||||
|
||||
xentry.appendChild(xlat)
|
||||
xentry.appendChild(xlong)
|
||||
|
||||
def source(xsource, source, bozo, format):
|
||||
""" copy source information to the entry """
|
||||
xdoc = xsource.ownerDocument
|
||||
@ -259,6 +271,21 @@ def reconstitute(feed, entry):
|
||||
entry['%s_%s' % (ns,name.lower())])
|
||||
xoriglink.setAttribute('xmlns:%s' % ns, feed.namespaces[ns])
|
||||
|
||||
# geo location
|
||||
if entry.has_key('where') and \
|
||||
entry.get('where',[]).has_key('type') and \
|
||||
entry.get('where',[]).has_key('coordinates'):
|
||||
where = entry.get('where',[])
|
||||
type = where.get('type',None)
|
||||
coordinates = where.get('coordinates',None)
|
||||
if type == 'Point':
|
||||
location(xentry, coordinates[0], coordinates[1])
|
||||
elif type == 'Box' or type == 'LineString' or type == 'Polygon':
|
||||
location(xentry, coordinates[0][0], coordinates[0][1])
|
||||
if entry.has_key('geo_lat') and \
|
||||
entry.has_key('geo_long'):
|
||||
location(xentry, (float)(entry.get('geo_long',None)), (float)(entry.get('geo_lat',None)))
|
||||
|
||||
# author / contributor
|
||||
author_detail = entry.get('author_detail',{})
|
||||
if author_detail and not author_detail.has_key('name') and \
|
||||
|
161
planet/vendor/feedparser.py
vendored
161
planet/vendor/feedparser.py
vendored
@ -397,6 +397,8 @@ class _FeedParserMixin:
|
||||
'http://freshmeat.net/rss/fm/': 'fm',
|
||||
'http://xmlns.com/foaf/0.1/': 'foaf',
|
||||
'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo',
|
||||
'http://www.georss.org/georss': 'georss',
|
||||
'http://www.opengis.net/gml': 'gml',
|
||||
'http://postneo.com/icbm/': 'icbm',
|
||||
'http://purl.org/rss/1.0/modules/image/': 'image',
|
||||
'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes',
|
||||
@ -456,6 +458,10 @@ class _FeedParserMixin:
|
||||
self.incontributor = 0
|
||||
self.inpublisher = 0
|
||||
self.insource = 0
|
||||
|
||||
# georss
|
||||
self.ingeometry = 0
|
||||
|
||||
self.sourcedata = FeedParserDict()
|
||||
self.contentparams = FeedParserDict()
|
||||
self._summaryKey = None
|
||||
@ -1269,6 +1275,89 @@ class _FeedParserMixin:
|
||||
def _end_expirationdate(self):
|
||||
self._save('expired_parsed', _parse_date(self.pop('expired')))
|
||||
|
||||
# geospatial location, or "where", from georss.org
|
||||
|
||||
def _start_georssgeom(self, attrsD):
|
||||
self.push('geometry', 0)
|
||||
_start_georss_point = _start_georssgeom
|
||||
_start_georss_line = _start_georssgeom
|
||||
_start_georss_polygon = _start_georssgeom
|
||||
_start_georss_box = _start_georssgeom
|
||||
|
||||
def _save_where(self, geometry):
|
||||
context = self._getContext()
|
||||
context.setdefault('where', {})
|
||||
context['where'] = FeedParserDict(geometry)
|
||||
|
||||
def _end_georss_point(self):
|
||||
geometry = _parse_georss_point(self.pop('geometry'))
|
||||
self._save_where(geometry)
|
||||
|
||||
def _end_georss_line(self):
|
||||
geometry = _parse_georss_line(self.pop('geometry'))
|
||||
self._save_where(geometry)
|
||||
|
||||
def _end_georss_polygon(self):
|
||||
this = self.pop('geometry')
|
||||
geometry = _parse_georss_polygon(this)
|
||||
self._save_where(geometry)
|
||||
|
||||
def _end_georss_box(self):
|
||||
geometry = _parse_georss_box(self.pop('geometry'))
|
||||
self._save_where(geometry)
|
||||
|
||||
def _start_where(self, attrsD):
|
||||
self.push('where', 0)
|
||||
_start_georss_where = _start_where
|
||||
|
||||
def _start_gml_point(self, attrsD):
|
||||
self.ingeometry = 'point'
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_linestring(self, attrsD):
|
||||
self.ingeometry = 'linestring'
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_polygon(self, attrsD):
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_exterior(self, attrsD):
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_linearring(self, attrsD):
|
||||
self.ingeometry = 'polygon'
|
||||
self.push('geometry', 0)
|
||||
|
||||
def _start_gml_pos(self, attrsD):
|
||||
self.push('pos', 0)
|
||||
|
||||
def _end_gml_pos(self):
|
||||
this = self.pop('pos')
|
||||
geometry = _parse_georss_point(this)
|
||||
self._save_where(geometry)
|
||||
|
||||
def _start_gml_poslist(self, attrsD):
|
||||
self.push('pos', 0)
|
||||
|
||||
def _end_gml_poslist(self):
|
||||
geometry = _parse_poslist(self.pop('pos'), self.ingeometry)
|
||||
self._save_where(geometry)
|
||||
|
||||
def _end_geom(self):
|
||||
self.ingeometry = 0
|
||||
self.pop('geometry')
|
||||
_end_gml_point = _end_geom
|
||||
_end_gml_linestring = _end_geom
|
||||
_end_gml_linearring = _end_geom
|
||||
_end_gml_exterior = _end_geom
|
||||
_end_gml_polygon = _end_geom
|
||||
|
||||
def _end_where(self):
|
||||
self.pop('where')
|
||||
_end_georss_where = _end_where
|
||||
|
||||
# end geospatial
|
||||
|
||||
def _start_cc_license(self, attrsD):
|
||||
context = self._getContext()
|
||||
value = self._getAttribute(attrsD, 'rdf:resource')
|
||||
@ -3336,7 +3425,77 @@ def _stripDoctype(data):
|
||||
data = doctype_pattern.sub(replacement, head) + data
|
||||
|
||||
return version, data, dict(replacement and safe_pattern.findall(replacement))
|
||||
|
||||
|
||||
# GeoRSS geometry parsers. Each return a dict with 'type' and 'coordinates'
|
||||
# keys, or None in the case of a parsing error
|
||||
|
||||
def _parse_poslist(value, geom_type):
|
||||
if geom_type == 'linestring':
|
||||
return _parse_georss_line(value)
|
||||
elif geom_type == 'polygon':
|
||||
ring = _parse_georss_line(value)
|
||||
return {'type': 'Polygon', 'coordinates': (ring['coordinates'],)}
|
||||
else:
|
||||
raise ValueError, "unsupported geometry type: %s" % geom_type
|
||||
|
||||
# Point coordinates are a 2-tuple (lon, lat)
|
||||
def _parse_georss_point(value):
|
||||
try:
|
||||
lat, lon = value.replace(',', ' ').split()
|
||||
return {'type': 'Point', 'coordinates': (float(lon), float(lat))}
|
||||
except Exception, e:
|
||||
if _debug:
|
||||
sys.stderr.write('_parse_georss_point raised %s\n' % (handler.__name__, repr(e)))
|
||||
pass
|
||||
return None
|
||||
|
||||
# Line coordinates are a tuple of 2-tuples ((lon0, lat0), ... (lonN, latN))
|
||||
def _parse_georss_line(value):
|
||||
try:
|
||||
latlons = value.replace(',', ' ').split()
|
||||
coords = []
|
||||
for i in range(0, len(latlons), 2):
|
||||
lat = float(latlons[i])
|
||||
lon = float(latlons[i+1])
|
||||
coords.append((lon, lat))
|
||||
return {'type': 'LineString', 'coordinates': tuple(coords)}
|
||||
except Exception, e:
|
||||
if _debug:
|
||||
sys.stderr.write('_parse_georss_line raised %s\n' % repr(e))
|
||||
pass
|
||||
return None
|
||||
|
||||
# Polygon coordinates are a tuple of closed LineString tuples. The first item
|
||||
# in the tuple is the exterior ring. Subsequent items are interior rings, but
|
||||
# georss:polygon elements usually have no interior rings.
|
||||
def _parse_georss_polygon(value):
|
||||
try:
|
||||
latlons = value.replace(',', ' ').split()
|
||||
coords = []
|
||||
for i in range(0, len(latlons), 2):
|
||||
lat = float(latlons[i])
|
||||
lon = float(latlons[i+1])
|
||||
coords.append((lon, lat))
|
||||
return {'type': 'Polygon', 'coordinates': (tuple(coords),)}
|
||||
except Exception, e:
|
||||
if _debug:
|
||||
sys.stderr.write('_parse_georss_polygon raised %s\n' % repr(e))
|
||||
pass
|
||||
return None
|
||||
|
||||
# Box coordinates are a 2-tuple of 2-tuples ((lon_ll, lat_ll), (lon_ur, lat_ur))
|
||||
def _parse_georss_box(value):
|
||||
try:
|
||||
vals = [float(x) for x in value.replace(',', ' ').split()]
|
||||
return {'type': 'Box', 'coordinates': ((vals[1], vals[0]), (vals[3], vals[2]))}
|
||||
except Exception, e:
|
||||
if _debug:
|
||||
sys.stderr.write('_parse_georss_box raised %s\n' % repr(e))
|
||||
pass
|
||||
return None
|
||||
|
||||
# end geospatial parsers
|
||||
|
||||
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[]):
|
||||
'''Parse a feed from a URL, file, stream, or string'''
|
||||
result = FeedParserDict()
|
||||
|
Loading…
Reference in New Issue
Block a user