From 88bdbe930e651141a8fb5aea7c8334cd924d3d59 Mon Sep 17 00:00:00 2001 From: Sam Ruby Date: Sun, 3 Sep 2006 13:12:54 -0400 Subject: [PATCH] ETags for reading lists --- planet/__init__.py | 4 ++-- planet/config.py | 40 ++++++++++++++++++++++++++++++++++------ tests/test_rlists.py | 2 +- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/planet/__init__.py b/planet/__init__.py index 8b9b982..9babf19 100644 --- a/planet/__init__.py +++ b/planet/__init__.py @@ -39,12 +39,12 @@ def setTimeout(timeout): try: from planet import timeoutsocket timeoutsocket.setDefaultSocketTimeout(timeout) - logger.debug("Socket timeout set to %d seconds", timeout) + logger.info("Socket timeout set to %d seconds", timeout) except ImportError: import socket if hasattr(socket, 'setdefaulttimeout'): logger.debug("timeoutsocket not found, using python function") socket.setdefaulttimeout(timeout) - logger.debug("Socket timeout set to %d seconds", timeout) + logger.info("Socket timeout set to %d seconds", timeout) else: logger.error("Unable to set timeout to %d seconds", timeout) diff --git a/planet/config.py b/planet/config.py index 7ea0a00..7438598 100644 --- a/planet/config.py +++ b/planet/config.py @@ -28,6 +28,7 @@ Todo: import os, sys, re from ConfigParser import ConfigParser +from urlparse import urljoin parser = ConfigParser() @@ -169,16 +170,44 @@ def load(config_file): for list in reading_lists: cache_filename = filename(config.cache_lists_directory(), list) try: - import urllib, StringIO + import urllib2, StringIO - # read once to verify - data=StringIO.StringIO(urllib.urlopen(list).read()) + # retrieve list options (e.g., etag, last-modified) from cache + options = {} + try: + cached_config = ConfigParser() + cached_config.read(cache_filename) + for option in cached_config.options(list): + options[option] = cached_config.get(list,option) + except: + pass cached_config = ConfigParser() + cached_config.add_section(list) + for key, value in options.items(): + cached_config.set(list, key, value) + + # read list + base = urljoin('file:', os.path.abspath(os.path.curdir)) + request = urllib2.Request(urljoin(base + '/', list)) + if options.has_key("etag"): + request.add_header('If-None-Match', options['etag']) + if options.has_key("last-modified"): + request.add_header('If-Modified-Since', + options['last-modified']) + response = urllib2.urlopen(request) + if response.headers.has_key('etag'): + cached_config.set(list, 'etag', response.headers['etag']) + if response.headers.has_key('last-modified'): + cached_config.set(list, 'last-modified', + response.headers['last-modified']) + + # convert to config.ini + data=StringIO.StringIO(response.read()) if content_type(list).find('opml')>=0: opml.opml2config(data, cached_config) elif content_type(list).find('foaf')>=0: foaf.foaf2config(data, cached_config) - if not cached_config.sections(): raise Exception + if cached_config.sections() in [[], [list]]: raise Exception # write to cache cache = open(cache_filename, 'w') @@ -196,7 +225,6 @@ def load(config_file): except: log.exception("Unable to read %s readinglist", list) continue - # planet.foaf.foaf2config(data, list, config) def cache_sources_directory(): if parser.has_option('Planet', 'cache_sources_directory'): @@ -217,7 +245,7 @@ def feed(): for template_file in template_files: name = os.path.splitext(os.path.basename(template_file))[0] if name.find('atom')>=0 or name.find('rss')>=0: - return urlparse.urljoin(link(), name) + return urljoin(link(), name) def feedtype(): if parser.has_option('Planet', 'feedtype'): diff --git a/tests/test_rlists.py b/tests/test_rlists.py index e1590b8..02285d5 100644 --- a/tests/test_rlists.py +++ b/tests/test_rlists.py @@ -45,5 +45,5 @@ class ReadingListTest(unittest.TestCase): feeds = [split(feed)[1] for feed in parser.sections()] feeds.sort() - self.assertEqual(['testfeed0.atom', 'testfeed1a.atom', + self.assertEqual(['opml.xml', 'testfeed0.atom', 'testfeed1a.atom', 'testfeed2.atom', 'testfeed3.rss'], feeds)