Updates from Sam Ruby.
This commit is contained in:
commit
f940ab6af4
@ -52,91 +52,3 @@ def setTimeout(timeout):
|
|||||||
logger.info("Socket timeout set to %d seconds", timeout)
|
logger.info("Socket timeout set to %d seconds", timeout)
|
||||||
else:
|
else:
|
||||||
logger.error("Unable to set timeout to %d seconds", timeout)
|
logger.error("Unable to set timeout to %d seconds", timeout)
|
||||||
|
|
||||||
def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True):
|
|
||||||
global logger
|
|
||||||
try:
|
|
||||||
|
|
||||||
import urllib2, StringIO
|
|
||||||
from planet.spider import filename
|
|
||||||
|
|
||||||
# list cache file name
|
|
||||||
cache_filename = filename(config.cache_lists_directory(), list)
|
|
||||||
|
|
||||||
# retrieve list options (e.g., etag, last-modified) from cache
|
|
||||||
options = {}
|
|
||||||
|
|
||||||
# add original options
|
|
||||||
for key in orig_config.options(list):
|
|
||||||
options[key] = orig_config.get(list, key)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if use_cache:
|
|
||||||
cached_config = ConfigParser()
|
|
||||||
cached_config.read(cache_filename)
|
|
||||||
for option in cached_config.options(list):
|
|
||||||
options[option] = cached_config.get(list,option)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
cached_config = ConfigParser()
|
|
||||||
cached_config.add_section(list)
|
|
||||||
for key, value in options.items():
|
|
||||||
cached_config.set(list, key, value)
|
|
||||||
|
|
||||||
# read list
|
|
||||||
curdir=getattr(os.path, 'curdir', '.')
|
|
||||||
if sys.platform.find('win') < 0:
|
|
||||||
base = urljoin('file:', os.path.abspath(curdir))
|
|
||||||
else:
|
|
||||||
path = os.path.abspath(os.path.curdir)
|
|
||||||
base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
|
|
||||||
|
|
||||||
request = urllib2.Request(urljoin(base + '/', list))
|
|
||||||
if options.has_key("etag"):
|
|
||||||
request.add_header('If-None-Match', options['etag'])
|
|
||||||
if options.has_key("last-modified"):
|
|
||||||
request.add_header('If-Modified-Since',
|
|
||||||
options['last-modified'])
|
|
||||||
response = urllib2.urlopen(request)
|
|
||||||
if response.headers.has_key('etag'):
|
|
||||||
cached_config.set(list, 'etag', response.headers['etag'])
|
|
||||||
if response.headers.has_key('last-modified'):
|
|
||||||
cached_config.set(list, 'last-modified',
|
|
||||||
response.headers['last-modified'])
|
|
||||||
|
|
||||||
# convert to config.ini
|
|
||||||
data = StringIO.StringIO(response.read())
|
|
||||||
|
|
||||||
if callback: callback(data, cached_config)
|
|
||||||
|
|
||||||
# write to cache
|
|
||||||
if use_cache:
|
|
||||||
cache = open(cache_filename, 'w')
|
|
||||||
cached_config.write(cache)
|
|
||||||
cache.close()
|
|
||||||
|
|
||||||
# re-parse and proceed
|
|
||||||
logger.debug("Using %s readinglist", list)
|
|
||||||
if re_read:
|
|
||||||
if use_cache:
|
|
||||||
orig_config.read(cache_filename)
|
|
||||||
else:
|
|
||||||
cdata = StringIO.StringIO()
|
|
||||||
cached_config.write(cdata)
|
|
||||||
cdata.seek(0)
|
|
||||||
orig_config.readfp(cdata)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
if re_read:
|
|
||||||
if use_cache:
|
|
||||||
orig_config.read(cache_filename)
|
|
||||||
else:
|
|
||||||
cdata = StringIO.StringIO()
|
|
||||||
cached_config.write(cdata)
|
|
||||||
cdata.seek(0)
|
|
||||||
orig_config.readfp(cdata)
|
|
||||||
logger.info("Using cached %s readinglist", list)
|
|
||||||
except:
|
|
||||||
logger.exception("Unable to read %s readinglist", list)
|
|
||||||
|
|
||||||
|
@ -182,7 +182,96 @@ def load(config_file):
|
|||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
for list in reading_lists:
|
for list in reading_lists:
|
||||||
planet.downloadReadingList(list, parser, data2config)
|
downloadReadingList(list, parser, data2config)
|
||||||
|
|
||||||
|
def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True):
|
||||||
|
from planet import logger
|
||||||
|
import config
|
||||||
|
try:
|
||||||
|
|
||||||
|
import urllib2, StringIO
|
||||||
|
from planet.spider import filename
|
||||||
|
|
||||||
|
# list cache file name
|
||||||
|
cache_filename = filename(config.cache_lists_directory(), list)
|
||||||
|
|
||||||
|
# retrieve list options (e.g., etag, last-modified) from cache
|
||||||
|
options = {}
|
||||||
|
|
||||||
|
# add original options
|
||||||
|
for key in orig_config.options(list):
|
||||||
|
options[key] = orig_config.get(list, key)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if use_cache:
|
||||||
|
cached_config = ConfigParser()
|
||||||
|
cached_config.read(cache_filename)
|
||||||
|
for option in cached_config.options(list):
|
||||||
|
options[option] = cached_config.get(list,option)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
cached_config = ConfigParser()
|
||||||
|
cached_config.add_section(list)
|
||||||
|
for key, value in options.items():
|
||||||
|
cached_config.set(list, key, value)
|
||||||
|
|
||||||
|
# read list
|
||||||
|
curdir=getattr(os.path, 'curdir', '.')
|
||||||
|
if sys.platform.find('win') < 0:
|
||||||
|
base = urljoin('file:', os.path.abspath(curdir))
|
||||||
|
else:
|
||||||
|
path = os.path.abspath(os.path.curdir)
|
||||||
|
base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
|
||||||
|
|
||||||
|
request = urllib2.Request(urljoin(base + '/', list))
|
||||||
|
if options.has_key("etag"):
|
||||||
|
request.add_header('If-None-Match', options['etag'])
|
||||||
|
if options.has_key("last-modified"):
|
||||||
|
request.add_header('If-Modified-Since',
|
||||||
|
options['last-modified'])
|
||||||
|
response = urllib2.urlopen(request)
|
||||||
|
if response.headers.has_key('etag'):
|
||||||
|
cached_config.set(list, 'etag', response.headers['etag'])
|
||||||
|
if response.headers.has_key('last-modified'):
|
||||||
|
cached_config.set(list, 'last-modified',
|
||||||
|
response.headers['last-modified'])
|
||||||
|
|
||||||
|
# convert to config.ini
|
||||||
|
data = StringIO.StringIO(response.read())
|
||||||
|
|
||||||
|
if callback: callback(data, cached_config)
|
||||||
|
|
||||||
|
# write to cache
|
||||||
|
if use_cache:
|
||||||
|
cache = open(cache_filename, 'w')
|
||||||
|
cached_config.write(cache)
|
||||||
|
cache.close()
|
||||||
|
|
||||||
|
# re-parse and proceed
|
||||||
|
logger.debug("Using %s readinglist", list)
|
||||||
|
if re_read:
|
||||||
|
if use_cache:
|
||||||
|
orig_config.read(cache_filename)
|
||||||
|
else:
|
||||||
|
cdata = StringIO.StringIO()
|
||||||
|
cached_config.write(cdata)
|
||||||
|
cdata.seek(0)
|
||||||
|
orig_config.readfp(cdata)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
if re_read:
|
||||||
|
if use_cache:
|
||||||
|
if not orig_config.read(cache_filename): raise Exception()
|
||||||
|
else:
|
||||||
|
cdata = StringIO.StringIO()
|
||||||
|
cached_config.write(cdata)
|
||||||
|
cdata.seek(0)
|
||||||
|
orig_config.readfp(cdata)
|
||||||
|
logger.info("Using cached %s readinglist", list)
|
||||||
|
except:
|
||||||
|
logger.exception("Unable to read %s readinglist", list)
|
||||||
|
|
||||||
|
|
||||||
def cache_sources_directory():
|
def cache_sources_directory():
|
||||||
if parser.has_option('Planet', 'cache_sources_directory'):
|
if parser.has_option('Planet', 'cache_sources_directory'):
|
||||||
|
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
|
|||||||
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "4.2-pre-" + "$Revision: 1.141 $"[11:16] + "-cvs"
|
__version__ = "4.2-pre-" + "$Revision: 1.142 $"[11:16] + "-cvs"
|
||||||
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
@ -2640,7 +2640,7 @@ def registerDateHandler(func):
|
|||||||
# 0301-04-01), so we use templates instead.
|
# 0301-04-01), so we use templates instead.
|
||||||
# Please note the order in templates is significant because we need a
|
# Please note the order in templates is significant because we need a
|
||||||
# greedy match.
|
# greedy match.
|
||||||
_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-MM', 'YYYY-?OOO',
|
_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-0MM?-?DD', 'YYYY-MM', 'YYYY-?OOO',
|
||||||
'YY-?MM-?DD', 'YY-?OOO', 'YYYY',
|
'YY-?MM-?DD', 'YY-?OOO', 'YYYY',
|
||||||
'-YY-?MM', '-OOO', '-YY',
|
'-YY-?MM', '-OOO', '-YY',
|
||||||
'--MM-?DD', '--MM',
|
'--MM-?DD', '--MM',
|
||||||
|
@ -133,8 +133,8 @@ def foaf2config(rdf, config, subject=None):
|
|||||||
{ 'content_type' : 'foaf',
|
{ 'content_type' : 'foaf',
|
||||||
'depth' : str(depth - 1) })
|
'depth' : str(depth - 1) })
|
||||||
try:
|
try:
|
||||||
import planet
|
from planet.config import downloadReadingList
|
||||||
planet.downloadReadingList(seeAlso, config,
|
downloadReadingList(seeAlso, config,
|
||||||
lambda data, subconfig : friend2config(model, friend, seeAlso, subconfig, data),
|
lambda data, subconfig : friend2config(model, friend, seeAlso, subconfig, data),
|
||||||
False)
|
False)
|
||||||
except:
|
except:
|
||||||
|
@ -202,6 +202,9 @@ def reconstitute(feed, entry):
|
|||||||
xentry=xdoc.documentElement
|
xentry=xdoc.documentElement
|
||||||
xentry.setAttribute('xmlns:planet',planet.xmlns)
|
xentry.setAttribute('xmlns:planet',planet.xmlns)
|
||||||
|
|
||||||
|
if entry.has_key('language'):
|
||||||
|
xentry.setAttribute('xml:lang', entry.language)
|
||||||
|
|
||||||
id(xentry, entry)
|
id(xentry, entry)
|
||||||
links(xentry, entry)
|
links(xentry, entry)
|
||||||
|
|
||||||
@ -225,7 +228,7 @@ def reconstitute(feed, entry):
|
|||||||
author(xentry, 'contributor', contributor)
|
author(xentry, 'contributor', contributor)
|
||||||
|
|
||||||
xsource = xdoc.createElement('source')
|
xsource = xdoc.createElement('source')
|
||||||
source(xsource, entry.get('source', feed.feed), bozo, feed.version)
|
source(xsource, entry.get('source') or feed.feed, bozo, feed.version)
|
||||||
xentry.appendChild(xsource)
|
xentry.appendChild(xsource)
|
||||||
|
|
||||||
return xdoc
|
return xdoc
|
||||||
|
@ -34,6 +34,16 @@ def filename(directory, filename):
|
|||||||
filename = re_initial_cruft.sub("", filename)
|
filename = re_initial_cruft.sub("", filename)
|
||||||
filename = re_final_cruft.sub("", filename)
|
filename = re_final_cruft.sub("", filename)
|
||||||
|
|
||||||
|
# limit length of filename
|
||||||
|
if len(filename)>250:
|
||||||
|
parts=filename.split(',')
|
||||||
|
for i in range(len(parts),0,-1):
|
||||||
|
if len(','.join(parts[:i])) < 220:
|
||||||
|
import md5
|
||||||
|
filename = ','.join(parts[:i]) + ',' + \
|
||||||
|
md5.new(','.join(parts[i:])).hexdigest()
|
||||||
|
break
|
||||||
|
|
||||||
return os.path.join(directory, filename)
|
return os.path.join(directory, filename)
|
||||||
|
|
||||||
def write(xdoc, out):
|
def write(xdoc, out):
|
||||||
|
@ -17,7 +17,7 @@ def splice():
|
|||||||
dir.sort()
|
dir.sort()
|
||||||
dir.reverse()
|
dir.reverse()
|
||||||
|
|
||||||
items=max([config.items_per_page(templ)
|
max_items=max([config.items_per_page(templ)
|
||||||
for templ in config.template_files() or ['Planet']])
|
for templ in config.template_files() or ['Planet']])
|
||||||
|
|
||||||
doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>')
|
doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>')
|
||||||
@ -49,25 +49,40 @@ def splice():
|
|||||||
link.setAttribute('href', config.link())
|
link.setAttribute('href', config.link())
|
||||||
feed.appendChild(link)
|
feed.appendChild(link)
|
||||||
|
|
||||||
# insert entry information
|
|
||||||
for mtime,file in dir[:items]:
|
|
||||||
try:
|
|
||||||
entry=minidom.parse(file)
|
|
||||||
feed.appendChild(entry.documentElement)
|
|
||||||
except:
|
|
||||||
log.error("Error parsing %s", file)
|
|
||||||
|
|
||||||
# insert subscription information
|
# insert subscription information
|
||||||
|
sub_ids = []
|
||||||
feed.setAttribute('xmlns:planet',planet.xmlns)
|
feed.setAttribute('xmlns:planet',planet.xmlns)
|
||||||
sources = config.cache_sources_directory()
|
sources = config.cache_sources_directory()
|
||||||
for sub in config.subscriptions():
|
for sub in config.subscriptions():
|
||||||
data=feedparser.parse(filename(sources,sub))
|
data=feedparser.parse(filename(sources,sub))
|
||||||
|
if data.feed.has_key('id'): sub_ids.append(data.feed.id)
|
||||||
if not data.feed: continue
|
if not data.feed: continue
|
||||||
xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
|
xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
|
||||||
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
|
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
|
||||||
reconstitute.source(xdoc.documentElement, data.feed, None, None)
|
reconstitute.source(xdoc.documentElement, data.feed, None, None)
|
||||||
feed.appendChild(xdoc.documentElement)
|
feed.appendChild(xdoc.documentElement)
|
||||||
|
|
||||||
|
# insert entry information
|
||||||
|
items = 0
|
||||||
|
for mtime,file in dir:
|
||||||
|
try:
|
||||||
|
entry=minidom.parse(file)
|
||||||
|
|
||||||
|
# verify that this entry is currently subscribed to
|
||||||
|
entry.normalize()
|
||||||
|
sources = entry.getElementsByTagName('source')
|
||||||
|
if sources:
|
||||||
|
ids = sources[0].getElementsByTagName('id')
|
||||||
|
if ids and ids[0].childNodes[0].nodeValue not in sub_ids:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# add entry to feed
|
||||||
|
feed.appendChild(entry.documentElement)
|
||||||
|
items = items + 1
|
||||||
|
if items >= max_items: break
|
||||||
|
except:
|
||||||
|
log.error("Error parsing %s", file)
|
||||||
|
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def apply(doc):
|
def apply(doc):
|
||||||
|
@ -26,7 +26,12 @@ import planet
|
|||||||
planet.getLogger("WARNING")
|
planet.getLogger("WARNING")
|
||||||
|
|
||||||
# load all of the tests into a suite
|
# load all of the tests into a suite
|
||||||
suite = unittest.TestLoader().loadTestsFromNames(modules)
|
try:
|
||||||
|
suite = unittest.TestLoader().loadTestsFromNames(modules)
|
||||||
|
except Exception, exception:
|
||||||
|
# attempt to produce a more specific message
|
||||||
|
for module in modules: __import__(module)
|
||||||
|
raise
|
||||||
|
|
||||||
# run test suite
|
# run test suite
|
||||||
unittest.TextTestRunner().run(suite)
|
unittest.TextTestRunner().run(suite)
|
||||||
|
14
tests/data/reconstitute/dc_lang.xml
Normal file
14
tests/data/reconstitute/dc_lang.xml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<!--
|
||||||
|
Description: title value
|
||||||
|
Expect: title_detail.language == 'en-us'
|
||||||
|
-->
|
||||||
|
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns="http://purl.org/rss/1.0/">
|
||||||
|
<item>
|
||||||
|
<title>foo</title>
|
||||||
|
<dc:language>en-us</dc:language>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>
|
15
tests/data/reconstitute/rsssource.xml
Normal file
15
tests/data/reconstitute/rsssource.xml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
<!--
|
||||||
|
Description: source element
|
||||||
|
Expect: source.title == 'foo'
|
||||||
|
-->
|
||||||
|
|
||||||
|
<rss version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>foo</title>
|
||||||
|
<item>
|
||||||
|
<guid>http://example.com/1</guid>
|
||||||
|
<source url="http://www.example.org">org</source>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
|
@ -16,3 +16,11 @@ class SpliceTest(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual('test planet',
|
self.assertEqual('test planet',
|
||||||
doc.getElementsByTagName('title')[0].firstChild.nodeValue)
|
doc.getElementsByTagName('title')[0].firstChild.nodeValue)
|
||||||
|
|
||||||
|
def test_splice_unsub(self):
|
||||||
|
config.load(configfile)
|
||||||
|
config.parser.remove_section('tests/data/spider/testfeed2.atom')
|
||||||
|
doc = splice()
|
||||||
|
self.assertEqual(8,len(doc.getElementsByTagName('entry')))
|
||||||
|
self.assertEqual(3,len(doc.getElementsByTagName('planet:source')))
|
||||||
|
self.assertEqual(11,len(doc.getElementsByTagName('planet:name')))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user