diff --git a/planet/__init__.py b/planet/__init__.py index baeb991..444b30b 100644 --- a/planet/__init__.py +++ b/planet/__init__.py @@ -52,91 +52,3 @@ def setTimeout(timeout): logger.info("Socket timeout set to %d seconds", timeout) else: logger.error("Unable to set timeout to %d seconds", timeout) - -def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True): - global logger - try: - - import urllib2, StringIO - from planet.spider import filename - - # list cache file name - cache_filename = filename(config.cache_lists_directory(), list) - - # retrieve list options (e.g., etag, last-modified) from cache - options = {} - - # add original options - for key in orig_config.options(list): - options[key] = orig_config.get(list, key) - - try: - if use_cache: - cached_config = ConfigParser() - cached_config.read(cache_filename) - for option in cached_config.options(list): - options[option] = cached_config.get(list,option) - except: - pass - - cached_config = ConfigParser() - cached_config.add_section(list) - for key, value in options.items(): - cached_config.set(list, key, value) - - # read list - curdir=getattr(os.path, 'curdir', '.') - if sys.platform.find('win') < 0: - base = urljoin('file:', os.path.abspath(curdir)) - else: - path = os.path.abspath(os.path.curdir) - base = urljoin('file:///', path.replace(':','|').replace('\\','/')) - - request = urllib2.Request(urljoin(base + '/', list)) - if options.has_key("etag"): - request.add_header('If-None-Match', options['etag']) - if options.has_key("last-modified"): - request.add_header('If-Modified-Since', - options['last-modified']) - response = urllib2.urlopen(request) - if response.headers.has_key('etag'): - cached_config.set(list, 'etag', response.headers['etag']) - if response.headers.has_key('last-modified'): - cached_config.set(list, 'last-modified', - response.headers['last-modified']) - - # convert to config.ini - data = StringIO.StringIO(response.read()) - - if callback: callback(data, cached_config) - - # write to cache - if use_cache: - cache = open(cache_filename, 'w') - cached_config.write(cache) - cache.close() - - # re-parse and proceed - logger.debug("Using %s readinglist", list) - if re_read: - if use_cache: - orig_config.read(cache_filename) - else: - cdata = StringIO.StringIO() - cached_config.write(cdata) - cdata.seek(0) - orig_config.readfp(cdata) - except: - try: - if re_read: - if use_cache: - orig_config.read(cache_filename) - else: - cdata = StringIO.StringIO() - cached_config.write(cdata) - cdata.seek(0) - orig_config.readfp(cdata) - logger.info("Using cached %s readinglist", list) - except: - logger.exception("Unable to read %s readinglist", list) - diff --git a/planet/config.py b/planet/config.py index 296997c..cd6a997 100644 --- a/planet/config.py +++ b/planet/config.py @@ -182,7 +182,96 @@ def load(config_file): raise Exception for list in reading_lists: - planet.downloadReadingList(list, parser, data2config) + downloadReadingList(list, parser, data2config) + +def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True): + from planet import logger + import config + try: + + import urllib2, StringIO + from planet.spider import filename + + # list cache file name + cache_filename = filename(config.cache_lists_directory(), list) + + # retrieve list options (e.g., etag, last-modified) from cache + options = {} + + # add original options + for key in orig_config.options(list): + options[key] = orig_config.get(list, key) + + try: + if use_cache: + cached_config = ConfigParser() + cached_config.read(cache_filename) + for option in cached_config.options(list): + options[option] = cached_config.get(list,option) + except: + pass + + cached_config = ConfigParser() + cached_config.add_section(list) + for key, value in options.items(): + cached_config.set(list, key, value) + + # read list + curdir=getattr(os.path, 'curdir', '.') + if sys.platform.find('win') < 0: + base = urljoin('file:', os.path.abspath(curdir)) + else: + path = os.path.abspath(os.path.curdir) + base = urljoin('file:///', path.replace(':','|').replace('\\','/')) + + request = urllib2.Request(urljoin(base + '/', list)) + if options.has_key("etag"): + request.add_header('If-None-Match', options['etag']) + if options.has_key("last-modified"): + request.add_header('If-Modified-Since', + options['last-modified']) + response = urllib2.urlopen(request) + if response.headers.has_key('etag'): + cached_config.set(list, 'etag', response.headers['etag']) + if response.headers.has_key('last-modified'): + cached_config.set(list, 'last-modified', + response.headers['last-modified']) + + # convert to config.ini + data = StringIO.StringIO(response.read()) + + if callback: callback(data, cached_config) + + # write to cache + if use_cache: + cache = open(cache_filename, 'w') + cached_config.write(cache) + cache.close() + + # re-parse and proceed + logger.debug("Using %s readinglist", list) + if re_read: + if use_cache: + orig_config.read(cache_filename) + else: + cdata = StringIO.StringIO() + cached_config.write(cdata) + cdata.seek(0) + orig_config.readfp(cdata) + except: + try: + if re_read: + if use_cache: + if not orig_config.read(cache_filename): raise Exception() + else: + cdata = StringIO.StringIO() + cached_config.write(cdata) + cdata.seek(0) + orig_config.readfp(cdata) + logger.info("Using cached %s readinglist", list) + except: + logger.exception("Unable to read %s readinglist", list) + def cache_sources_directory(): if parser.has_option('Planet', 'cache_sources_directory'): diff --git a/planet/foaf.py b/planet/foaf.py index 463a660..eb981d1 100644 --- a/planet/foaf.py +++ b/planet/foaf.py @@ -133,8 +133,8 @@ def foaf2config(rdf, config, subject=None): { 'content_type' : 'foaf', 'depth' : str(depth - 1) }) try: - import planet - planet.downloadReadingList(seeAlso, config, + from planet.config import downloadReadingList + downloadReadingList(seeAlso, config, lambda data, subconfig : friend2config(model, friend, seeAlso, subconfig, data), False) except: diff --git a/planet/reconstitute.py b/planet/reconstitute.py index 196d691..3149256 100644 --- a/planet/reconstitute.py +++ b/planet/reconstitute.py @@ -225,7 +225,7 @@ def reconstitute(feed, entry): author(xentry, 'contributor', contributor) xsource = xdoc.createElement('source') - source(xsource, entry.get('source', feed.feed), bozo, feed.version) + source(xsource, entry.get('source') or feed.feed, bozo, feed.version) xentry.appendChild(xsource) return xdoc diff --git a/tests/data/reconstitute/rsssource.xml b/tests/data/reconstitute/rsssource.xml new file mode 100644 index 0000000..28acaa5 --- /dev/null +++ b/tests/data/reconstitute/rsssource.xml @@ -0,0 +1,15 @@ + + + + + foo + + http://example.com/1 + org + + + +