Still having problems with channel_name.
This commit is contained in:
parent
58bb4b6e05
commit
217e850e41
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
|
|||||||
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "4.2-pre-" + "$Revision: 1.142 $"[11:16] + "-cvs"
|
__version__ = "4.2-pre-" + "$Revision: 1.145 $"[11:16] + "-cvs"
|
||||||
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
@ -218,6 +218,9 @@ class FeedParserDict(UserDict):
|
|||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
if key == 'category':
|
if key == 'category':
|
||||||
return UserDict.__getitem__(self, 'tags')[0]['term']
|
return UserDict.__getitem__(self, 'tags')[0]['term']
|
||||||
|
if key == 'enclosures':
|
||||||
|
norel = lambda link: FeedParserDict([(name,value) for (name,value) in link.items() if name!='rel'])
|
||||||
|
return [norel(link) for link in UserDict.__getitem__(self, 'links') if link['rel']=='enclosure']
|
||||||
if key == 'categories':
|
if key == 'categories':
|
||||||
return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')]
|
return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')]
|
||||||
realkey = self.keymap.get(key, key)
|
realkey = self.keymap.get(key, key)
|
||||||
@ -1303,15 +1306,15 @@ class _FeedParserMixin:
|
|||||||
attrsD.setdefault('type', 'application/atom+xml')
|
attrsD.setdefault('type', 'application/atom+xml')
|
||||||
else:
|
else:
|
||||||
attrsD.setdefault('type', 'text/html')
|
attrsD.setdefault('type', 'text/html')
|
||||||
|
context = self._getContext()
|
||||||
attrsD = self._itsAnHrefDamnIt(attrsD)
|
attrsD = self._itsAnHrefDamnIt(attrsD)
|
||||||
if attrsD.has_key('href'):
|
if attrsD.has_key('href'):
|
||||||
attrsD['href'] = self.resolveURI(attrsD['href'])
|
attrsD['href'] = self.resolveURI(attrsD['href'])
|
||||||
|
if attrsD.get('rel')=='enclosure' and not context.get('id'):
|
||||||
|
context['id'] = attrsD.get('href')
|
||||||
expectingText = self.infeed or self.inentry or self.insource
|
expectingText = self.infeed or self.inentry or self.insource
|
||||||
context = self._getContext()
|
|
||||||
context.setdefault('links', [])
|
context.setdefault('links', [])
|
||||||
context['links'].append(FeedParserDict(attrsD))
|
context['links'].append(FeedParserDict(attrsD))
|
||||||
if attrsD['rel'] == 'enclosure':
|
|
||||||
self._start_enclosure(attrsD)
|
|
||||||
if attrsD.has_key('href'):
|
if attrsD.has_key('href'):
|
||||||
expectingText = 0
|
expectingText = 0
|
||||||
if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
|
if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
|
||||||
@ -1357,6 +1360,7 @@ class _FeedParserMixin:
|
|||||||
self._start_content(attrsD)
|
self._start_content(attrsD)
|
||||||
else:
|
else:
|
||||||
self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource)
|
self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource)
|
||||||
|
_start_dc_description = _start_description
|
||||||
|
|
||||||
def _start_abstract(self, attrsD):
|
def _start_abstract(self, attrsD):
|
||||||
self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
|
self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
|
||||||
@ -1368,6 +1372,7 @@ class _FeedParserMixin:
|
|||||||
value = self.popContent('description')
|
value = self.popContent('description')
|
||||||
self._summaryKey = None
|
self._summaryKey = None
|
||||||
_end_abstract = _end_description
|
_end_abstract = _end_description
|
||||||
|
_end_dc_description = _end_description
|
||||||
|
|
||||||
def _start_info(self, attrsD):
|
def _start_info(self, attrsD):
|
||||||
self.pushContent('info', attrsD, 'text/plain', 1)
|
self.pushContent('info', attrsD, 'text/plain', 1)
|
||||||
@ -1427,7 +1432,8 @@ class _FeedParserMixin:
|
|||||||
def _start_enclosure(self, attrsD):
|
def _start_enclosure(self, attrsD):
|
||||||
attrsD = self._itsAnHrefDamnIt(attrsD)
|
attrsD = self._itsAnHrefDamnIt(attrsD)
|
||||||
context = self._getContext()
|
context = self._getContext()
|
||||||
context.setdefault('enclosures', []).append(FeedParserDict(attrsD))
|
attrsD['rel']='enclosure'
|
||||||
|
context.setdefault('links', []).append(FeedParserDict(attrsD))
|
||||||
href = attrsD.get('href')
|
href = attrsD.get('href')
|
||||||
if href and not context.get('id'):
|
if href and not context.get('id'):
|
||||||
context['id'] = href
|
context['id'] = href
|
||||||
|
@ -140,7 +140,7 @@ def spiderFeed(feed, only_if_new=0, content=None, resp_headers=None):
|
|||||||
|
|
||||||
# read feed itself
|
# read feed itself
|
||||||
if content:
|
if content:
|
||||||
data = feedparser.parse(content, resp_headers)
|
data = feedparser.parse(content, resp_headers=resp_headers)
|
||||||
else:
|
else:
|
||||||
modified = None
|
modified = None
|
||||||
try:
|
try:
|
||||||
@ -338,8 +338,12 @@ def spiderPlanet(only_if_new = False):
|
|||||||
work_queue = Queue()
|
work_queue = Queue()
|
||||||
awaiting_parsing = Queue()
|
awaiting_parsing = Queue()
|
||||||
|
|
||||||
|
http_cache = config.http_cache_directory()
|
||||||
|
if not os.path.exists(http_cache):
|
||||||
|
os.makedirs(http_cache, 0700)
|
||||||
|
|
||||||
def _spider_proc(thread_index):
|
def _spider_proc(thread_index):
|
||||||
h = httplib2.Http(config.http_cache_directory())
|
h = httplib2.Http(http_cache)
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
# The non-blocking get will throw an exception when the queue
|
# The non-blocking get will throw an exception when the queue
|
||||||
|
Loading…
x
Reference in New Issue
Block a user