Updates from Sam Ruby.

2006-10-12 21:24:45 -05:00 · 2006-10-12 21:24:45 -05:00 · f940ab6af4
commit f940ab6af4
parent eb1dc357e2 db79be60cc
11 changed files with 175 additions and 104 deletions
--- a/planet/init.py
+++ b/planet/init.py
@ -52,91 +52,3 @@ def setTimeout(timeout):
                logger.info("Socket timeout set to %d seconds", timeout)
            else:
                logger.error("Unable to set timeout to %d seconds", timeout)
 def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True):
    global logger
    try:
        import urllib2, StringIO
        from planet.spider import filename
        # list cache file name
        cache_filename = filename(config.cache_lists_directory(), list)
        # retrieve list options (e.g., etag, last-modified) from cache
        options = {}
        # add original options
        for key in orig_config.options(list):
            options[key] = orig_config.get(list, key)
        try:
            if use_cache:
                cached_config = ConfigParser()
                cached_config.read(cache_filename)
                for option in cached_config.options(list):
                     options[option] = cached_config.get(list,option)
        except:
            pass
        cached_config = ConfigParser()
        cached_config.add_section(list)
        for key, value in options.items():
            cached_config.set(list, key, value)
        # read list
        curdir=getattr(os.path, 'curdir', '.')
        if sys.platform.find('win') < 0:
            base = urljoin('file:', os.path.abspath(curdir))
        else:
            path = os.path.abspath(os.path.curdir)
            base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
        request = urllib2.Request(urljoin(base + '/', list))
        if options.has_key("etag"):
            request.add_header('If-None-Match', options['etag'])
        if options.has_key("last-modified"):
            request.add_header('If-Modified-Since',
                options['last-modified'])
        response = urllib2.urlopen(request)
        if response.headers.has_key('etag'):
            cached_config.set(list, 'etag', response.headers['etag'])
        if response.headers.has_key('last-modified'):
            cached_config.set(list, 'last-modified',
                response.headers['last-modified'])
        # convert to config.ini
        data = StringIO.StringIO(response.read())
        if callback: callback(data, cached_config)
        # write to cache
        if use_cache:
            cache = open(cache_filename, 'w')
            cached_config.write(cache)
            cache.close()
        # re-parse and proceed
        logger.debug("Using %s readinglist", list) 
        if re_read:
            if use_cache:  
                orig_config.read(cache_filename)
            else:
                cdata = StringIO.StringIO()
                cached_config.write(cdata)
                cdata.seek(0)
                orig_config.readfp(cdata)
    except:
        try:
            if re_read:
                if use_cache:  
                    orig_config.read(cache_filename)
                else:
                    cdata = StringIO.StringIO()
                    cached_config.write(cdata)
                    cdata.seek(0)
                    orig_config.readfp(cdata)
                logger.info("Using cached %s readinglist", list)
        except:
            logger.exception("Unable to read %s readinglist", list)
--- a/planet/config.py
+++ b/planet/config.py
@ -182,7 +182,96 @@ def load(config_file):
                    raise Exception
        for list in reading_lists:
-            planet.downloadReadingList(list, parser, data2config)
+            downloadReadingList(list, parser, data2config)
 def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True):
    from planet import logger
    import config
    try:
        import urllib2, StringIO
        from planet.spider import filename
        # list cache file name
        cache_filename = filename(config.cache_lists_directory(), list)
        # retrieve list options (e.g., etag, last-modified) from cache
        options = {}
        # add original options
        for key in orig_config.options(list):
            options[key] = orig_config.get(list, key)
        try:
            if use_cache:
                cached_config = ConfigParser()
                cached_config.read(cache_filename)
                for option in cached_config.options(list):
                     options[option] = cached_config.get(list,option)
        except:
            pass
        cached_config = ConfigParser()
        cached_config.add_section(list)
        for key, value in options.items():
            cached_config.set(list, key, value)
        # read list
        curdir=getattr(os.path, 'curdir', '.')
        if sys.platform.find('win') < 0:
            base = urljoin('file:', os.path.abspath(curdir))
        else:
            path = os.path.abspath(os.path.curdir)
            base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
        request = urllib2.Request(urljoin(base + '/', list))
        if options.has_key("etag"):
            request.add_header('If-None-Match', options['etag'])
        if options.has_key("last-modified"):
            request.add_header('If-Modified-Since',
                options['last-modified'])
        response = urllib2.urlopen(request)
        if response.headers.has_key('etag'):
            cached_config.set(list, 'etag', response.headers['etag'])
        if response.headers.has_key('last-modified'):
            cached_config.set(list, 'last-modified',
                response.headers['last-modified'])
        # convert to config.ini
        data = StringIO.StringIO(response.read())
        if callback: callback(data, cached_config)
        # write to cache
        if use_cache:
            cache = open(cache_filename, 'w')
            cached_config.write(cache)
            cache.close()
        # re-parse and proceed
        logger.debug("Using %s readinglist", list) 
        if re_read:
            if use_cache:  
                orig_config.read(cache_filename)
            else:
                cdata = StringIO.StringIO()
                cached_config.write(cdata)
                cdata.seek(0)
                orig_config.readfp(cdata)
    except:
        try:
            if re_read:
                if use_cache:  
                    if not orig_config.read(cache_filename): raise Exception()
                else:
                    cdata = StringIO.StringIO()
                    cached_config.write(cdata)
                    cdata.seek(0)
                    orig_config.readfp(cdata)
                logger.info("Using cached %s readinglist", list)
        except:
            logger.exception("Unable to read %s readinglist", list)
 def cache_sources_directory():
    if parser.has_option('Planet', 'cache_sources_directory'):
--- a/planet/feedparser.py
+++ b/planet/feedparser.py
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
 Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
 """
-__version__ = "4.2-pre-" + "$Revision: 1.141 $"[11:16] + "-cvs"
+__version__ = "4.2-pre-" + "$Revision: 1.142 $"[11:16] + "-cvs"
 __license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
 Redistribution and use in source and binary forms, with or without modification,
@ -2640,7 +2640,7 @@ def registerDateHandler(func):
 # 0301-04-01), so we use templates instead.
 # Please note the order in templates is significant because we need a
 # greedy match.
-_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-MM', 'YYYY-?OOO',
+_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-0MM?-?DD', 'YYYY-MM', 'YYYY-?OOO',
                'YY-?MM-?DD', 'YY-?OOO', 'YYYY', 
                '-YY-?MM', '-OOO', '-YY',
                '--MM-?DD', '--MM',
--- a/planet/foaf.py
+++ b/planet/foaf.py
@ -133,8 +133,8 @@ def foaf2config(rdf, config, subject=None):
                            { 'content_type' : 'foaf', 
                              'depth' : str(depth - 1) })
                try:
-                    import planet
+                    from planet.config import downloadReadingList
-                    planet.downloadReadingList(seeAlso, config,
+                    downloadReadingList(seeAlso, config,
                        lambda data, subconfig : friend2config(model, friend, seeAlso, subconfig, data), 
                        False)
                except:
--- a/planet/reconstitute.py
+++ b/planet/reconstitute.py
@ -202,6 +202,9 @@ def reconstitute(feed, entry):
    xentry=xdoc.documentElement
    xentry.setAttribute('xmlns:planet',planet.xmlns)
    if entry.has_key('language'):
        xentry.setAttribute('xml:lang', entry.language)
    id(xentry, entry)
    links(xentry, entry)
@ -225,7 +228,7 @@ def reconstitute(feed, entry):
        author(xentry, 'contributor', contributor)
    xsource = xdoc.createElement('source')
-    source(xsource, entry.get('source', feed.feed), bozo, feed.version)
+    source(xsource, entry.get('source') or feed.feed, bozo, feed.version)
    xentry.appendChild(xsource)
    return xdoc
--- a/planet/spider.py
+++ b/planet/spider.py
@ -34,6 +34,16 @@ def filename(directory, filename):
    filename = re_initial_cruft.sub("", filename)
    filename = re_final_cruft.sub("", filename)
    # limit length of filename
    if len(filename)>250:
        parts=filename.split(',')
        for i in range(len(parts),0,-1):
            if len(','.join(parts[:i])) < 220:
                import md5
                filename = ','.join(parts[:i]) + ',' + \
                    md5.new(','.join(parts[i:])).hexdigest()
                break
    return os.path.join(directory, filename)
 def write(xdoc, out):
--- a/planet/splice.py
+++ b/planet/splice.py
@ -17,7 +17,7 @@ def splice():
    dir.sort()
    dir.reverse()
-    items=max([config.items_per_page(templ)
+    max_items=max([config.items_per_page(templ)
        for templ in config.template_files() or ['Planet']])
    doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>')
@ -49,25 +49,40 @@ def splice():
        link.setAttribute('href', config.link())
        feed.appendChild(link)
    # insert entry information
    for mtime,file in dir[:items]:
        try:
            entry=minidom.parse(file)
            feed.appendChild(entry.documentElement)
        except:
            log.error("Error parsing %s", file)
    # insert subscription information
    sub_ids = []
    feed.setAttribute('xmlns:planet',planet.xmlns)
    sources = config.cache_sources_directory()
    for sub in config.subscriptions():
        data=feedparser.parse(filename(sources,sub))
        if data.feed.has_key('id'): sub_ids.append(data.feed.id)
        if not data.feed: continue
        xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
             xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
        reconstitute.source(xdoc.documentElement, data.feed, None, None)
        feed.appendChild(xdoc.documentElement)
    # insert entry information
    items = 0
    for mtime,file in dir:
        try:
            entry=minidom.parse(file)
            # verify that this entry is currently subscribed to
            entry.normalize()
            sources = entry.getElementsByTagName('source')
            if sources:
                ids = sources[0].getElementsByTagName('id')
                if ids and ids[0].childNodes[0].nodeValue not in sub_ids:
                    continue
            # add entry to feed
            feed.appendChild(entry.documentElement)
            items = items + 1
            if items >= max_items: break
        except:
            log.error("Error parsing %s", file)
    return doc
 def apply(doc):
--- a/runtests.py
+++ b/runtests.py
@ -26,7 +26,12 @@ import planet
 planet.getLogger("WARNING")
 # load all of the tests into a suite
-suite = unittest.TestLoader().loadTestsFromNames(modules)
+try:
    suite = unittest.TestLoader().loadTestsFromNames(modules)
 except Exception, exception:
    # attempt to produce a more specific message
    for module in modules: __import__(module)
    raise
 # run test suite
 unittest.TextTestRunner().run(suite)
--- a/tests/data/reconstitute/dc_lang.xml
+++ b/tests/data/reconstitute/dc_lang.xml
@ -0,0 +1,14 @@
 <!--
 Description:  title value
 Expect:       title_detail.language == 'en-us'
 -->
 <rdf:RDF
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
  xmlns="http://purl.org/rss/1.0/">
 <item>
  <title>foo</title>
  <dc:language>en-us</dc:language>
 </item>
 </rdf:RDF>
--- a/tests/data/reconstitute/rsssource.xml
+++ b/tests/data/reconstitute/rsssource.xml
@ -0,0 +1,15 @@
 <!--
 Description:  source element
 Expect:       source.title == 'foo' 
 -->
 <rss version="2.0">
  <channel>
    <title>foo</title>
    <item>
      <guid>http://example.com/1</guid>
      <source url="http://www.example.org">org</source>
    </item>
  </channel>
 </rss>
--- a/tests/test_splice.py
+++ b/tests/test_splice.py
@ -16,3 +16,11 @@ class SpliceTest(unittest.TestCase):
        self.assertEqual('test planet',
            doc.getElementsByTagName('title')[0].firstChild.nodeValue)
    def test_splice_unsub(self):
        config.load(configfile)
        config.parser.remove_section('tests/data/spider/testfeed2.atom')
        doc = splice()
        self.assertEqual(8,len(doc.getElementsByTagName('entry')))
        self.assertEqual(3,len(doc.getElementsByTagName('planet:source')))
        self.assertEqual(11,len(doc.getElementsByTagName('planet:name')))