Added support for more geo tags
This commit is contained in:
parent
935f4107cb
commit
149b702c88
0
expunge.py
Normal file → Executable file
0
expunge.py
Normal file → Executable file
@ -186,6 +186,18 @@ def content(xentry, name, detail, bozo):
|
|||||||
|
|
||||||
xentry.appendChild(xcontent)
|
xentry.appendChild(xcontent)
|
||||||
|
|
||||||
|
def location(xentry, long, lat):
|
||||||
|
""" insert geo location into the entry """
|
||||||
|
if not lat or not long: return
|
||||||
|
|
||||||
|
xlat = createTextElement(xentry, '%s:%s' % ('geo','lat'), '%f' % lat)
|
||||||
|
xlat.setAttribute('xmlns:%s' % 'geo', 'http://www.w3.org/2003/01/geo/wgs84_pos#')
|
||||||
|
xlong = createTextElement(xentry, '%s:%s' % ('geo','long'), '%f' % long)
|
||||||
|
xlong.setAttribute('xmlns:%s' % 'geo', 'http://www.w3.org/2003/01/geo/wgs84_pos#')
|
||||||
|
|
||||||
|
xentry.appendChild(xlat)
|
||||||
|
xentry.appendChild(xlong)
|
||||||
|
|
||||||
def source(xsource, source, bozo, format):
|
def source(xsource, source, bozo, format):
|
||||||
""" copy source information to the entry """
|
""" copy source information to the entry """
|
||||||
xdoc = xsource.ownerDocument
|
xdoc = xsource.ownerDocument
|
||||||
@ -259,6 +271,21 @@ def reconstitute(feed, entry):
|
|||||||
entry['%s_%s' % (ns,name.lower())])
|
entry['%s_%s' % (ns,name.lower())])
|
||||||
xoriglink.setAttribute('xmlns:%s' % ns, feed.namespaces[ns])
|
xoriglink.setAttribute('xmlns:%s' % ns, feed.namespaces[ns])
|
||||||
|
|
||||||
|
# geo location
|
||||||
|
if entry.has_key('where') and \
|
||||||
|
entry.get('where',[]).has_key('type') and \
|
||||||
|
entry.get('where',[]).has_key('coordinates'):
|
||||||
|
where = entry.get('where',[])
|
||||||
|
type = where.get('type',None)
|
||||||
|
coordinates = where.get('coordinates',None)
|
||||||
|
if type == 'Point':
|
||||||
|
location(xentry, coordinates[0], coordinates[1])
|
||||||
|
elif type == 'Box' or type == 'LineString' or type == 'Polygon':
|
||||||
|
location(xentry, coordinates[0][0], coordinates[0][1])
|
||||||
|
if entry.has_key('geo_lat') and \
|
||||||
|
entry.has_key('geo_long'):
|
||||||
|
location(xentry, (float)(entry.get('geo_long',None)), (float)(entry.get('geo_lat',None)))
|
||||||
|
|
||||||
# author / contributor
|
# author / contributor
|
||||||
author_detail = entry.get('author_detail',{})
|
author_detail = entry.get('author_detail',{})
|
||||||
if author_detail and not author_detail.has_key('name') and \
|
if author_detail and not author_detail.has_key('name') and \
|
||||||
|
161
planet/vendor/feedparser.py
vendored
161
planet/vendor/feedparser.py
vendored
@ -397,6 +397,8 @@ class _FeedParserMixin:
|
|||||||
'http://freshmeat.net/rss/fm/': 'fm',
|
'http://freshmeat.net/rss/fm/': 'fm',
|
||||||
'http://xmlns.com/foaf/0.1/': 'foaf',
|
'http://xmlns.com/foaf/0.1/': 'foaf',
|
||||||
'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo',
|
'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo',
|
||||||
|
'http://www.georss.org/georss': 'georss',
|
||||||
|
'http://www.opengis.net/gml': 'gml',
|
||||||
'http://postneo.com/icbm/': 'icbm',
|
'http://postneo.com/icbm/': 'icbm',
|
||||||
'http://purl.org/rss/1.0/modules/image/': 'image',
|
'http://purl.org/rss/1.0/modules/image/': 'image',
|
||||||
'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes',
|
'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes',
|
||||||
@ -456,6 +458,10 @@ class _FeedParserMixin:
|
|||||||
self.incontributor = 0
|
self.incontributor = 0
|
||||||
self.inpublisher = 0
|
self.inpublisher = 0
|
||||||
self.insource = 0
|
self.insource = 0
|
||||||
|
|
||||||
|
# georss
|
||||||
|
self.ingeometry = 0
|
||||||
|
|
||||||
self.sourcedata = FeedParserDict()
|
self.sourcedata = FeedParserDict()
|
||||||
self.contentparams = FeedParserDict()
|
self.contentparams = FeedParserDict()
|
||||||
self._summaryKey = None
|
self._summaryKey = None
|
||||||
@ -1269,6 +1275,89 @@ class _FeedParserMixin:
|
|||||||
def _end_expirationdate(self):
|
def _end_expirationdate(self):
|
||||||
self._save('expired_parsed', _parse_date(self.pop('expired')))
|
self._save('expired_parsed', _parse_date(self.pop('expired')))
|
||||||
|
|
||||||
|
# geospatial location, or "where", from georss.org
|
||||||
|
|
||||||
|
def _start_georssgeom(self, attrsD):
|
||||||
|
self.push('geometry', 0)
|
||||||
|
_start_georss_point = _start_georssgeom
|
||||||
|
_start_georss_line = _start_georssgeom
|
||||||
|
_start_georss_polygon = _start_georssgeom
|
||||||
|
_start_georss_box = _start_georssgeom
|
||||||
|
|
||||||
|
def _save_where(self, geometry):
|
||||||
|
context = self._getContext()
|
||||||
|
context.setdefault('where', {})
|
||||||
|
context['where'] = FeedParserDict(geometry)
|
||||||
|
|
||||||
|
def _end_georss_point(self):
|
||||||
|
geometry = _parse_georss_point(self.pop('geometry'))
|
||||||
|
self._save_where(geometry)
|
||||||
|
|
||||||
|
def _end_georss_line(self):
|
||||||
|
geometry = _parse_georss_line(self.pop('geometry'))
|
||||||
|
self._save_where(geometry)
|
||||||
|
|
||||||
|
def _end_georss_polygon(self):
|
||||||
|
this = self.pop('geometry')
|
||||||
|
geometry = _parse_georss_polygon(this)
|
||||||
|
self._save_where(geometry)
|
||||||
|
|
||||||
|
def _end_georss_box(self):
|
||||||
|
geometry = _parse_georss_box(self.pop('geometry'))
|
||||||
|
self._save_where(geometry)
|
||||||
|
|
||||||
|
def _start_where(self, attrsD):
|
||||||
|
self.push('where', 0)
|
||||||
|
_start_georss_where = _start_where
|
||||||
|
|
||||||
|
def _start_gml_point(self, attrsD):
|
||||||
|
self.ingeometry = 'point'
|
||||||
|
self.push('geometry', 0)
|
||||||
|
|
||||||
|
def _start_gml_linestring(self, attrsD):
|
||||||
|
self.ingeometry = 'linestring'
|
||||||
|
self.push('geometry', 0)
|
||||||
|
|
||||||
|
def _start_gml_polygon(self, attrsD):
|
||||||
|
self.push('geometry', 0)
|
||||||
|
|
||||||
|
def _start_gml_exterior(self, attrsD):
|
||||||
|
self.push('geometry', 0)
|
||||||
|
|
||||||
|
def _start_gml_linearring(self, attrsD):
|
||||||
|
self.ingeometry = 'polygon'
|
||||||
|
self.push('geometry', 0)
|
||||||
|
|
||||||
|
def _start_gml_pos(self, attrsD):
|
||||||
|
self.push('pos', 0)
|
||||||
|
|
||||||
|
def _end_gml_pos(self):
|
||||||
|
this = self.pop('pos')
|
||||||
|
geometry = _parse_georss_point(this)
|
||||||
|
self._save_where(geometry)
|
||||||
|
|
||||||
|
def _start_gml_poslist(self, attrsD):
|
||||||
|
self.push('pos', 0)
|
||||||
|
|
||||||
|
def _end_gml_poslist(self):
|
||||||
|
geometry = _parse_poslist(self.pop('pos'), self.ingeometry)
|
||||||
|
self._save_where(geometry)
|
||||||
|
|
||||||
|
def _end_geom(self):
|
||||||
|
self.ingeometry = 0
|
||||||
|
self.pop('geometry')
|
||||||
|
_end_gml_point = _end_geom
|
||||||
|
_end_gml_linestring = _end_geom
|
||||||
|
_end_gml_linearring = _end_geom
|
||||||
|
_end_gml_exterior = _end_geom
|
||||||
|
_end_gml_polygon = _end_geom
|
||||||
|
|
||||||
|
def _end_where(self):
|
||||||
|
self.pop('where')
|
||||||
|
_end_georss_where = _end_where
|
||||||
|
|
||||||
|
# end geospatial
|
||||||
|
|
||||||
def _start_cc_license(self, attrsD):
|
def _start_cc_license(self, attrsD):
|
||||||
context = self._getContext()
|
context = self._getContext()
|
||||||
value = self._getAttribute(attrsD, 'rdf:resource')
|
value = self._getAttribute(attrsD, 'rdf:resource')
|
||||||
@ -3336,7 +3425,77 @@ def _stripDoctype(data):
|
|||||||
data = doctype_pattern.sub(replacement, head) + data
|
data = doctype_pattern.sub(replacement, head) + data
|
||||||
|
|
||||||
return version, data, dict(replacement and safe_pattern.findall(replacement))
|
return version, data, dict(replacement and safe_pattern.findall(replacement))
|
||||||
|
|
||||||
|
# GeoRSS geometry parsers. Each return a dict with 'type' and 'coordinates'
|
||||||
|
# keys, or None in the case of a parsing error
|
||||||
|
|
||||||
|
def _parse_poslist(value, geom_type):
|
||||||
|
if geom_type == 'linestring':
|
||||||
|
return _parse_georss_line(value)
|
||||||
|
elif geom_type == 'polygon':
|
||||||
|
ring = _parse_georss_line(value)
|
||||||
|
return {'type': 'Polygon', 'coordinates': (ring['coordinates'],)}
|
||||||
|
else:
|
||||||
|
raise ValueError, "unsupported geometry type: %s" % geom_type
|
||||||
|
|
||||||
|
# Point coordinates are a 2-tuple (lon, lat)
|
||||||
|
def _parse_georss_point(value):
|
||||||
|
try:
|
||||||
|
lat, lon = value.replace(',', ' ').split()
|
||||||
|
return {'type': 'Point', 'coordinates': (float(lon), float(lat))}
|
||||||
|
except Exception, e:
|
||||||
|
if _debug:
|
||||||
|
sys.stderr.write('_parse_georss_point raised %s\n' % (handler.__name__, repr(e)))
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Line coordinates are a tuple of 2-tuples ((lon0, lat0), ... (lonN, latN))
|
||||||
|
def _parse_georss_line(value):
|
||||||
|
try:
|
||||||
|
latlons = value.replace(',', ' ').split()
|
||||||
|
coords = []
|
||||||
|
for i in range(0, len(latlons), 2):
|
||||||
|
lat = float(latlons[i])
|
||||||
|
lon = float(latlons[i+1])
|
||||||
|
coords.append((lon, lat))
|
||||||
|
return {'type': 'LineString', 'coordinates': tuple(coords)}
|
||||||
|
except Exception, e:
|
||||||
|
if _debug:
|
||||||
|
sys.stderr.write('_parse_georss_line raised %s\n' % repr(e))
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Polygon coordinates are a tuple of closed LineString tuples. The first item
|
||||||
|
# in the tuple is the exterior ring. Subsequent items are interior rings, but
|
||||||
|
# georss:polygon elements usually have no interior rings.
|
||||||
|
def _parse_georss_polygon(value):
|
||||||
|
try:
|
||||||
|
latlons = value.replace(',', ' ').split()
|
||||||
|
coords = []
|
||||||
|
for i in range(0, len(latlons), 2):
|
||||||
|
lat = float(latlons[i])
|
||||||
|
lon = float(latlons[i+1])
|
||||||
|
coords.append((lon, lat))
|
||||||
|
return {'type': 'Polygon', 'coordinates': (tuple(coords),)}
|
||||||
|
except Exception, e:
|
||||||
|
if _debug:
|
||||||
|
sys.stderr.write('_parse_georss_polygon raised %s\n' % repr(e))
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Box coordinates are a 2-tuple of 2-tuples ((lon_ll, lat_ll), (lon_ur, lat_ur))
|
||||||
|
def _parse_georss_box(value):
|
||||||
|
try:
|
||||||
|
vals = [float(x) for x in value.replace(',', ' ').split()]
|
||||||
|
return {'type': 'Box', 'coordinates': ((vals[1], vals[0]), (vals[3], vals[2]))}
|
||||||
|
except Exception, e:
|
||||||
|
if _debug:
|
||||||
|
sys.stderr.write('_parse_georss_box raised %s\n' % repr(e))
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
# end geospatial parsers
|
||||||
|
|
||||||
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[]):
|
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[]):
|
||||||
'''Parse a feed from a URL, file, stream, or string'''
|
'''Parse a feed from a URL, file, stream, or string'''
|
||||||
result = FeedParserDict()
|
result = FeedParserDict()
|
||||||
|
Loading…
Reference in New Issue
Block a user