Updates from Sam Ruby.

2006-09-24 15:43:19 -05:00 · 2006-09-24 15:43:19 -05:00 · 2deb9bcf3d
commit 2deb9bcf3d
parent c5896465b0 4b0cd8d5d9
19 changed files with 276 additions and 142 deletions
--- a/1
+++ b/1
@ -3,6 +3,7 @@ Mary Gardiner   - PythonPath
 Elias Torres    - FOAF OnlineAccounts
 Jacques Distler - Template patches
 Michael Koziarski - HTTP Auth fix
 Brian Ewins     - Win32 / Portalocker
 This codebase represents a radical refactoring of Planet 2.0, which lists
 the following contributors:
--- a/planet/BeautifulSoup.py
+++ b/planet/BeautifulSoup.py
@ -66,6 +66,9 @@ except:
    if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
    name2codepoint[name]=ord(codepoint)
 # python 2.2 support
 if not hasattr(__builtins__, 'basestring'): basestring=str
 # This RE makes Beautiful Soup able to parse XML with namespaces.
 sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
@ -821,7 +824,8 @@ class SoupStrainer:
    def _matches(self, markup, matchAgainst):    
        #print "Matching %s against %s" % (markup, matchAgainst)
        result = False
-        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
+        if matchAgainst == True and (not hasattr(types, 'BooleanType') or
            type(matchAgainst) == types.BooleanType):
            result = markup != None
        elif callable(matchAgainst):
            result = matchAgainst(markup)
@ -869,7 +873,7 @@ def isString(s):
    """Convenience method that works with all 2.x versions of Python
    to determine whether or not something is stringlike."""
    try:
-        return isinstance(s, unicode) or isintance(s, basestring) 
+        return isinstance(s, unicode) or isinstance(s, basestring) 
    except NameError:
        return isinstance(s, str)
@ -1284,6 +1288,12 @@ class BeautifulStoneSoup(Tag, SGMLParser):
                j = i + len(toHandle)
        return j
    def convert_charref(self, name):
        return '&#%s;' % name
    def convert_entityref(self, name):
        return '&%s;' % name
 class BeautifulSoup(BeautifulStoneSoup):
    """This parser knows the following facts about HTML:
@ -1654,6 +1664,8 @@ class UnicodeDammit:
        '''Given a string and its encoding, decodes the string into Unicode.
        %encoding is a string recognized by encodings.aliases'''
        if not data: return u''
        # strip Byte Order Mark (if present)
        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
               and (data[2:4] != '\x00\x00'):
--- a/planet/init.py
+++ b/planet/init.py
@ -67,8 +67,8 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
        options = {}
        # add original options
-        for key, value in orig_config.items(list):
+        for key in orig_config.options(list):
-            options[key] = value
+            options[key] = orig_config.get(list, key)
        try:
            if use_cache:
@ -85,7 +85,13 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
            cached_config.set(list, key, value)
        # read list
-        base = urljoin('file:', os.path.abspath(os.path.curdir))
+        curdir=getattr(os.path, 'curdir', '.')
        if sys.platform.find('win') < 0:
            base = urljoin('file:', os.path.abspath(curdir))
        else:
            path = os.path.abspath(os.path.curdir)
            base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
        request = urllib2.Request(urljoin(base + '/', list))
        if options.has_key("etag"):
            request.add_header('If-None-Match', options['etag'])
--- a/planet/config.py
+++ b/planet/config.py
@ -149,11 +149,14 @@ def load(config_file):
                    config.template_directories()]
                # merge configurations, allowing current one to override theme
                template_files = config.template_files()
                parser.read(config_file)
                for file in config.bill_of_materials():
                    if not file in bom: bom.append(file)
                parser.set('Planet', 'bill_of_materials', ' '.join(bom))
                parser.set('Planet', 'template_directories', ' '.join(dirs))
                parser.set('Planet', 'template_files',
                   ' '.join(template_files + config.template_files()))
                break
        else:
            log.error('Unable to find theme %s', theme)
--- a/planet/feedparser.py
+++ b/planet/feedparser.py
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
 Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
 """
-__version__ = "4.2-pre-" + "$Revision: 1.139 $"[11:16] + "-cvs"
+__version__ = "4.2-pre-" + "$Revision: 1.141 $"[11:16] + "-cvs"
 __license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
 Redistribution and use in source and binary forms, with or without modification,
@ -130,6 +130,18 @@ try:
 except:
    chardet = None
 # reversable htmlentitydefs mappings for Python 2.2
 try:
  from htmlentitydefs import name2codepoint, codepoint2name
 except:
  import htmlentitydefs
  name2codepoint={}
  codepoint2name={}
  for (name,codepoint) in htmlentitydefs.entitydefs.iteritems():
    if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
    name2codepoint[name]=ord(codepoint)
    codepoint2name[ord(codepoint)]=name
 # BeautifulSoup parser used for parsing microformats from embedded HTML content
 # http://www.crummy.com/software/BeautifulSoup/.  At the moment, it appears
 # that there is a version incompatibility, so the import is replaced with
@ -574,20 +586,9 @@ class _FeedParserMixin:
            if text.startswith('&#') and text.endswith(';'):
                return self.handle_entityref(text)
        else:
-            # entity resolution graciously donated by Aaron Swartz
+            try: name2codepoint[ref]
            def name2cp(k):
                import htmlentitydefs
                if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
                    return htmlentitydefs.name2codepoint[k]
                k = htmlentitydefs.entitydefs[k]
                if k.startswith('&#x') and k.endswith(';'):
                    return int(k[3:-1],16) # not in latin-1
                if k.startswith('&#') and k.endswith(';'):
                    return int(k[2:-1]) # not in latin-1
                return ord(k)
            try: name2cp(ref)
            except KeyError: text = '&%s;' % ref
-            else: text = unichr(name2cp(ref)).encode('utf-8')
+            else: text = unichr(name2codepoint[ref]).encode('utf-8')
        self.elementstack[-1][2].append(text)
    def handle_data(self, text, escape=1):
@ -672,9 +673,9 @@ class _FeedParserMixin:
            # only if all the remaining content is nested underneath it.
            # This means that the divs would be retained in the following:
            #    <div>foo</div><div>bar</div>
-            if pieces and len(pieces)>1 and not pieces[-1].strip():
+            while pieces and len(pieces)>1 and not pieces[-1].strip():
                del pieces[-1]
-            if pieces and len(pieces)>1 and not pieces[0].strip():
+            while pieces and len(pieces)>1 and not pieces[0].strip():
                del pieces[0]
            if pieces and (pieces[0] == '<div>' or pieces[0].startswith('<div ')) and pieces[-1]=='</div>':
                depth = 0
@ -1521,6 +1522,11 @@ if _XML_AVAILABLE:
            if prefix:
                localname = prefix.lower() + ':' + localname
            elif namespace and not qname: #Expat
                for name,value in self.namespacesInUse.items():
                     if name and value == namespace:
                         localname = name + ':' + localname
                         break
            if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
            for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
@ -1546,6 +1552,11 @@ if _XML_AVAILABLE:
            prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
            if prefix:
                localname = prefix + ':' + localname
            elif namespace and not qname: #Expat
                for name,value in self.namespacesInUse.items():
                     if name and value == namespace:
                         localname = name + ':' + localname
                         break
            localname = str(localname).lower()
            self.unknown_endtag(localname)
@ -1657,8 +1668,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
    def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
        # Reconstruct the original entity reference.
-        import htmlentitydefs
+        if name2codepoint.has_key(ref):
        if not hasattr(htmlentitydefs, 'name2codepoint') or htmlentitydefs.name2codepoint.has_key(ref):
            self.pieces.append('&%(ref)s;' % locals())
        else:
            self.pieces.append('&amp;%(ref)s' % locals())
@ -1705,6 +1715,12 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
 #            self.updatepos(declstartpos, i)
            return None, -1
    def convert_charref(self, name):
        return '&#%s;' % name
    def convert_entityref(self, name):
        return '&%s;' % name
    def output(self):
        '''Return processed HTML as a single string'''
        return ''.join([str(p) for p in self.pieces])
--- a/planet/htmltmpl.py
+++ b/planet/htmltmpl.py
@ -44,6 +44,7 @@ import cgi          # for HTML escaping of variables
 import urllib       # for URL escaping of variables
 import cPickle      # for template compilation
 import gettext
 import portalocker  # for locking
 INCLUDE_DIR = "inc"
@ -57,25 +58,6 @@ PARAM_ESCAPE = 2
 PARAM_GLOBAL = 3
 PARAM_GETTEXT_STRING = 1
 # Find a way to lock files. Currently implemented only for UNIX and windows.
 LOCKTYPE_FCNTL = 1
 LOCKTYPE_MSVCRT = 2
 LOCKTYPE = None
 try:
    import fcntl
 except:
    try:
        import msvcrt
    except:
        LOCKTYPE = None
    else:
        LOCKTYPE = LOCKTYPE_MSVCRT
 else:
    LOCKTYPE = LOCKTYPE_FCNTL
 LOCK_EX = 1
 LOCK_SH = 2
 LOCK_UN = 3
 ##############################################
 #          CLASS: TemplateManager            #
 ##############################################
@ -130,13 +112,6 @@ class TemplateManager:
            The <em>TemplateError</em>exception is raised when the precompiled
            template cannot be saved. Precompilation is enabled by default.
            Precompilation is available only on UNIX and Windows platforms,
            because proper file locking which is necessary to ensure
            multitask safe behaviour is platform specific and is not
            implemented for other platforms. Attempts to enable precompilation
            on the other platforms result in raise of the
            <em>TemplateError</em> exception.
            @param comments Enable or disable template comments.
            This optional parameter can be used to enable or disable
            template comments.
@ -159,13 +134,6 @@ class TemplateManager:
        self._gettext = gettext
        self._debug = debug
        # Find what module to use to lock files.
        # File locking is necessary for the 'precompile' feature to be
        # multitask/thread safe. Currently it works only on UNIX
        # and Windows. Anyone willing to implement it on Mac ?
        if precompile and not LOCKTYPE:
                raise TemplateError, "Template precompilation is not "\
                                     "available on this platform."
        self.DEB("INIT DONE")
    def prepare(self, file):
@ -260,33 +228,6 @@ class TemplateManager:
        """
        if self._debug: print >> sys.stderr, str
    def lock_file(self, file, lock):
        """ Provide platform independent file locking.
            @hidden
        """
        fd = file.fileno()
        if LOCKTYPE == LOCKTYPE_FCNTL:
            if lock == LOCK_SH:
                fcntl.flock(fd, fcntl.LOCK_SH)
            elif lock == LOCK_EX:
                fcntl.flock(fd, fcntl.LOCK_EX)
            elif lock == LOCK_UN:
                fcntl.flock(fd, fcntl.LOCK_UN)
            else:
                raise TemplateError, "BUG: bad lock in lock_file"
        elif LOCKTYPE == LOCKTYPE_MSVCRT:
            if lock == LOCK_SH:
                # msvcrt does not support shared locks :-(
                msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
            elif lock == LOCK_EX:
                msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
            elif lock == LOCK_UN:
                msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
            else:
                raise TemplateError, "BUG: bad lock in lock_file"
        else:
            raise TemplateError, "BUG: bad locktype in lock_file"
    def compile(self, file):
        """ Compile the template.
            @hidden
@ -322,7 +263,7 @@ class TemplateManager:
            file = None
            try:
                file = open(filename, "rb")
-                self.lock_file(file, LOCK_SH)
+                portalocker.lock(file, portalocker.LOCK_SH)
                precompiled = cPickle.load(file)
            except IOError, (errno, errstr):
                raise TemplateError, "IO error in load precompiled "\
@ -338,7 +279,7 @@ class TemplateManager:
                return precompiled
        finally:
            if file:
-                self.lock_file(file, LOCK_UN)
+                portalocker.unlock(file)
                file.close()
            if remove_bad and os.path.isfile(filename):
                # X: We may lose the original exception here, raising OSError.
@ -369,7 +310,7 @@ class TemplateManager:
            file = None
            try:
                file = open(filename, "wb")   # may truncate existing file
-                self.lock_file(file, LOCK_EX)
+                portalocker.lock(file, portalocker.LOCK_EX)
                BINARY = 1
                READABLE = 0
                if self._debug:
@ -393,7 +334,7 @@ class TemplateManager:
                self.DEB("SAVING PRECOMPILED")
        finally:
            if file:
-                self.lock_file(file, LOCK_UN)
+                portalocker.unlock(file)
                file.close()
            if remove_bad and os.path.isfile(filename):
                # X: We may lose the original exception here, raising OSError.
--- a/planet/portalocker.py
+++ b/planet/portalocker.py
@ -0,0 +1,93 @@
 # portalocker.py - Cross-platform (posix/nt) API for flock-style file locking.
 #                  Requires python 1.5.2 or better.
 # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203/index_txt
 # Except where otherwise noted, recipes in the Python Cookbook are 
 # published under the Python license.
 """Cross-platform (posix/nt) API for flock-style file locking.
 Synopsis:
   import portalocker
   file = open("somefile", "r+")
   portalocker.lock(file, portalocker.LOCK_EX)
   file.seek(12)
   file.write("foo")
   file.close()
 If you know what you're doing, you may choose to
   portalocker.unlock(file)
 before closing the file, but why?
 Methods:
   lock( file, flags )
   unlock( file )
 Constants:
   LOCK_EX
   LOCK_SH
   LOCK_NB
 I learned the win32 technique for locking files from sample code
 provided by John Nielsen <nielsenjf@my-deja.com> in the documentation
 that accompanies the win32 modules.
 Author: Jonathan Feinberg <jdf@pobox.com>
 Version: $Id: portalocker.py,v 1.3 2001/05/29 18:47:55 Administrator Exp $
 """
 import os
 if os.name == 'nt':
 	import win32con
 	import win32file
 	import pywintypes
 	LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
 	LOCK_SH = 0 # the default
 	LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
 	# is there any reason not to reuse the following structure?
 	__overlapped = pywintypes.OVERLAPPED()
 elif os.name == 'posix':
 	import fcntl
 	LOCK_EX = fcntl.LOCK_EX
 	LOCK_SH = fcntl.LOCK_SH
 	LOCK_NB = fcntl.LOCK_NB
 else:
 	raise RuntimeError("PortaLocker only defined for nt and posix platforms")
 if os.name == 'nt':
 	def lock(file, flags):
 		hfile = win32file._get_osfhandle(file.fileno())
 		win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
 	def unlock(file):
 		hfile = win32file._get_osfhandle(file.fileno())
 		win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
 elif os.name =='posix':
 	def lock(file, flags):
 		fcntl.flock(file.fileno(), flags)
 	def unlock(file):
 		fcntl.flock(file.fileno(), fcntl.LOCK_UN)
 if __name__ == '__main__':
 	from time import time, strftime, localtime
 	import sys
 	import portalocker
 	log = open('log.txt', "a+")
 	portalocker.lock(log, portalocker.LOCK_EX)
 	timestamp = strftime("%m/%d/%Y %H:%M:%S\n", localtime(time()))
 	log.write( timestamp )
 	print "Wrote lines. Hit enter to release lock."
 	dummy = sys.stdin.readline()
 	log.close()
--- a/planet/reconstitute.py
+++ b/planet/reconstitute.py
@ -162,7 +162,7 @@ def content(xentry, name, detail, bozo):
    xentry.appendChild(xcontent)
-def source(xsource, source, bozo):
+def source(xsource, source, bozo, format):
    """ copy source information to the entry """
    xdoc = xsource.ownerDocument
@ -193,6 +193,9 @@ def source(xsource, source, bozo):
        if key.startswith('planet_'):
            createTextElement(xsource, key.replace('_',':',1), value)
    createTextElement(xsource, 'planet:bozo', bozo and 'true' or 'false')
    createTextElement(xsource, 'planet:format', format)
 def reconstitute(feed, entry):
    """ create an entry document from a parsed feed """
    xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
@ -222,7 +225,7 @@ def reconstitute(feed, entry):
        author(xentry, 'contributor', contributor)
    xsource = xdoc.createElement('source')
-    source(xsource, entry.get('source', feed.feed), bozo)
+    source(xsource, entry.get('source', feed.feed), bozo, feed.version)
    xentry.appendChild(xsource)
    return xdoc
--- a/planet/shell/init.py
+++ b/planet/shell/init.py
@ -2,6 +2,8 @@ import planet
 import os
 import sys
 logged_modes = []
 def run(template_file, doc, mode='template'):
    """ select a template module based on file extension and execute it """
    log = planet.getLogger(planet.config.log_level())
@ -16,7 +18,14 @@ def run(template_file, doc, mode='template'):
        template_resolved = os.path.join(template_dir, template_file)
        if os.path.exists(template_resolved): break
    else:
-        return log.error("Unable to locate %s %s", mode, template_file)
+        log.error("Unable to locate %s %s", mode, template_file)
        if not mode in logged_modes:
            log.info("%s search path:", mode)
            for template_dir in dirs:
                log.info("    %s", os.path.realpath(template_dir))
            logged_modes.append(mode)
        return
    template_resolved = os.path.realpath(template_resolved)
    # Add shell directory to the path, if not already there
    shellpath = os.path.join(sys.path[0],'planet','shell')
@ -34,13 +43,11 @@ def run(template_file, doc, mode='template'):
    # Execute the shell module
    options = planet.config.template_options(template_file)
    log.debug("Processing %s %s using %s", mode,
        os.path.realpath(template_resolved), module_name)
    if mode == 'filter':
        log.debug("Processing filer %s using %s", template_resolved,
            module_name)
        return module.run(template_resolved, doc, None, options)
    else:
        log.info("Processing template %s using %s", template_resolved,
            module_name)
        output_dir = planet.config.output_dir()
        output_file = os.path.join(output_dir, base)
        module.run(template_resolved, doc, output_file, options)
--- a/planet/spider.py
+++ b/planet/spider.py
@ -116,6 +116,9 @@ def spiderFeed(feed):
    data = feedparser.parse(feed_info.feed.get('planet_http_location',feed),
        etag=feed_info.feed.get('planet_http_etag',None), modified=modified)
    # if read failed, retain cached information
    if not data.version and feed_info.version: data.feed = feed_info.feed
    # capture http status
    if not data.has_key("status"):
        if data.has_key("entries") and len(data.entries)>0:
@ -167,32 +170,6 @@ def spiderFeed(feed):
    for name, value in config.feed_options(feed).items():
        data.feed['planet_'+name] = value
    # identify inactive feeds
    if config.activity_threshold(feed):
        activity_horizon = \
            time.gmtime(time.time()-86400*config.activity_threshold(feed))
        updated = [entry.updated_parsed for entry in data.entries
            if entry.has_key('updated_parsed')]
        updated.sort()
        if not updated or updated[-1] < activity_horizon:
            msg = "no activity in %d days" % config.activity_threshold(feed)
            log.info(msg)
            data.feed['planet_message'] = msg
    # report channel level errors
    if data.status == 403:
       data.feed['planet_message'] = "403: forbidden"
    elif data.status == 404:
       data.feed['planet_message'] = "404: not found"
    elif data.status == 408:
       data.feed['planet_message'] = "408: request timeout"
    elif data.status == 410:
       data.feed['planet_message'] = "410: gone"
    elif data.status == 500:
       data.feed['planet_message'] = "internal server error"
    elif data.status >= 400:
       data.feed['planet_message'] = "http status %s" % data.status
    # perform user configured scrub operations on the data
    scrub(feed, data)
@ -234,11 +211,37 @@ def spiderFeed(feed):
        write(output, cache_file) 
        os.utime(cache_file, (mtime, mtime))
    # identify inactive feeds
    if config.activity_threshold(feed):
        activity_horizon = \
            time.gmtime(time.time()-86400*config.activity_threshold(feed))
        updated = [entry.updated_parsed for entry in data.entries
            if entry.has_key('updated_parsed')]
        updated.sort()
        if not updated or updated[-1] < activity_horizon:
            msg = "no activity in %d days" % config.activity_threshold(feed)
            log.info(msg)
            data.feed['planet_message'] = msg
    # report channel level errors
    if data.status == 403:
       data.feed['planet_message'] = "403: forbidden"
    elif data.status == 404:
       data.feed['planet_message'] = "404: not found"
    elif data.status == 408:
       data.feed['planet_message'] = "408: request timeout"
    elif data.status == 410:
       data.feed['planet_message'] = "410: gone"
    elif data.status == 500:
       data.feed['planet_message'] = "internal server error"
    elif data.status >= 400:
       data.feed['planet_message'] = "http status %s" % data.status
    # write the feed info to the cache
    if not os.path.exists(sources): os.makedirs(sources)
    xdoc=minidom.parseString('''<feed xmlns:planet="%s"
      xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
-    reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
+    reconstitute.source(xdoc.documentElement,data.feed,data.bozo,data.version)
    write(xdoc.toxml('utf-8'), filename(sources, feed))
    xdoc.unlink()
--- a/planet/splice.py
+++ b/planet/splice.py
@ -65,7 +65,7 @@ def splice():
        if not data.feed: continue
        xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
             xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
-        reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
+        reconstitute.source(xdoc.documentElement, data.feed, None, None)
        feed.appendChild(xdoc.documentElement)
    return doc
--- a/runtests.py
+++ b/runtests.py
@ -21,6 +21,10 @@ sys.path[0] = os.getcwd()
 # find all of the planet test modules
 modules = map(fullmodname, glob.glob(os.path.join('tests', 'test_*.py')))
 # enable warnings
 import planet
 planet.getLogger("WARNING")
 # load all of the tests into a suite
 suite = unittest.TestLoader().loadTestsFromNames(modules)
--- a/tests/data/reconstitute/source_bozo.xml
+++ b/tests/data/reconstitute/source_bozo.xml
@ -0,0 +1,8 @@
 <!--
 Description:  id
 Expect:       source.planet_bozo == 'false'
 -->
 <feed xmlns="http://www.w3.org/2005/Atom">
  <entry/>
 </feed>
--- a/tests/data/reconstitute/source_format.xml
+++ b/tests/data/reconstitute/source_format.xml
@ -0,0 +1,8 @@
 <!--
 Description:  id
 Expect:       source.planet_format == 'atom10'
 -->
 <feed xmlns="http://www.w3.org/2005/Atom">
  <entry/>
 </feed>
--- a/tests/test_apply.py
+++ b/tests/test_apply.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import unittest, os, shutil
-from planet import config, splice
+from planet import config, splice, logger
 from xml.dom import minidom
 workdir = 'tests/work/apply'
@ -32,7 +32,7 @@ class ApplyTest(unittest.TestCase):
        for file in ['index.html', 'default.css', 'images/foaf.png']:
            path = os.path.join(workdir, file)
            self.assertTrue(os.path.exists(path))
-            self.assertTrue(os.stat(path).st_size > 0)
+            self.assertTrue(os.stat(path).st_size > 0, file + ' has size 0')
        # verify that index.html is well formed, has content, and xml:lang
        html = open(os.path.join(workdir, 'index.html'))
@ -62,3 +62,26 @@ class ApplyTest(unittest.TestCase):
        self.assertTrue(html.find('<h1>test planet</h1>')>=0)
        self.assertTrue(html.find(
          '<h4><a href="http://example.com/2">Venus</a></h4>')>=0)
 try:
    import libxml2
 except ImportError:
    try:
        import win32pipe
        (stdin,stdout) = win32pipe.popen4('xsltproc -V', 't')
        stdin.close()
        stdout.read()
        try:
            exitcode = stdout.close()
        except IOError:
            exitcode = -1
    except:
        import commands
        (exitstatus,output) = commands.getstatusoutput('xsltproc -V')
        exitcode = ((exitstatus>>8) & 0xFF)
    if exitcode:
        logger.warn("xsltproc is not available => can't test XSLT templates")
        for method in dir(ApplyTest):
            if method.startswith('test_'):  delattr(ApplyTest,method)
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import unittest, xml.dom.minidom
-from planet import shell, config
+from planet import shell, config, logger
 class FilterTests(unittest.TestCase):
@ -80,7 +80,10 @@ try:
    from subprocess import Popen, PIPE
    sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE)
    sed.communicate()
-    if sed.returncode != 0: raise Exception
+    if sed.returncode != 0:
-except:
+        logger.warn("sed is not available => can't test stripAd_yahoo")
-    # sed is not available
+        del FilterTests.test_stripAd_yahoo
-    del FilterTests.test_stripAd_yahoo
+except ImportError:
    logger.warn("Popen is not available => can't test filters")
    for method in dir(FilterTests):
        if method.startswith('test_'):  delattr(FilterTests,method)
--- a/tests/test_foaf.py
+++ b/tests/test_foaf.py
@ -3,7 +3,7 @@
 import unittest, os, shutil
 from planet.foaf import foaf2config
 from ConfigParser import ConfigParser
-from planet import config
+from planet import config, logger
 workdir = 'tests/work/config/cache'
@ -119,6 +119,7 @@ class FoafTest(unittest.TestCase):
 try:
    import RDF
 except:
    logger.warn("Redland RDF is not available => can't test FOAF reading lists")
    for key in FoafTest.__dict__.keys():
        if key.startswith('test_'): delattr(FoafTest, key)
--- a/tests/test_rlists.py
+++ b/tests/test_rlists.py
@ -6,7 +6,7 @@ from os.path import split
 from glob import glob
 from ConfigParser import ConfigParser
-workdir = 'tests/work/config/cache'
+workdir = os.path.join('tests', 'work', 'config', 'cache')
 class ReadingListTest(unittest.TestCase):
    def setUp(self):
@ -38,7 +38,7 @@ class ReadingListTest(unittest.TestCase):
    def test_cache(self):
        cache = glob(os.path.join(workdir,'lists','*'))
-        self.assertTrue(1,len(cache))
+        self.assertEqual(1,len(cache))
        parser = ConfigParser()
        parser.read(cache[0])
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@ -26,11 +26,13 @@ class SpiderTest(unittest.TestCase):
        os.removedirs(os.path.split(workdir)[0])
    def test_filename(self):
-        self.assertEqual('./example.com,index.html',
+        self.assertEqual(os.path.join('.', 'example.com,index.html'),
            filename('.', 'http://example.com/index.html'))
-        self.assertEqual('./planet.intertwingly.net,2006,testfeed1,1',
+        self.assertEqual(os.path.join('.',
            'planet.intertwingly.net,2006,testfeed1,1'),
            filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1'))
-        self.assertEqual('./00000000-0000-0000-0000-000000000000',
+        self.assertEqual(os.path.join('.',
            '00000000-0000-0000-0000-000000000000'),
            filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000'))
        # Requires Python 2.3
@ -38,7 +40,7 @@ class SpiderTest(unittest.TestCase):
            import encodings.idna
        except:
            return
-        self.assertEqual('./xn--8ws00zhy3a.com',
+        self.assertEqual(os.path.join('.', 'xn--8ws00zhy3a.com'),
            filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))
    def test_spiderFeed(self):
@ -51,8 +53,8 @@ class SpiderTest(unittest.TestCase):
        self.assertEqual(5, len(files))
        # verify that the file names are as expected
-        self.assertTrue(workdir + 
+        self.assertTrue(os.path.join(workdir,
-            '/planet.intertwingly.net,2006,testfeed1,1' in files)
+            'planet.intertwingly.net,2006,testfeed1,1') in files)
        # verify that the file timestamps match atom:updated
        data = feedparser.parse(files[2])
@ -73,10 +75,10 @@ class SpiderTest(unittest.TestCase):
        self.assertEqual(13, len(files))
        # verify that the file names are as expected
-        self.assertTrue(workdir + 
+        self.assertTrue(os.path.join(workdir,
-            '/planet.intertwingly.net,2006,testfeed1,1' in files)
+            'planet.intertwingly.net,2006,testfeed1,1') in files)
-        self.assertTrue(workdir + 
+        self.assertTrue(os.path.join(workdir,
-            '/planet.intertwingly.net,2006,testfeed2,1' in files)
+            'planet.intertwingly.net,2006,testfeed2,1') in files)
        data = feedparser.parse(workdir + 
            '/planet.intertwingly.net,2006,testfeed3,1')