Updates from Sam Ruby.

2006-09-24 15:43:19 -05:00 · 2006-09-24 15:43:19 -05:00 · 2deb9bcf3d
commit 2deb9bcf3d
parent c5896465b0 4b0cd8d5d9
19 changed files with 276 additions and 142 deletions
--- a/1
+++ b/1
@ -3,6 +3,7 @@ Mary Gardiner   - PythonPath
 Elias Torres    - FOAF OnlineAccounts
 Jacques Distler - Template patches
 Michael Koziarski - HTTP Auth fix
+Brian Ewins     - Win32 / Portalocker

 This codebase represents a radical refactoring of Planet 2.0, which lists
 the following contributors:
--- a/planet/BeautifulSoup.py
+++ b/planet/BeautifulSoup.py
@ -66,6 +66,9 @@ except:
    if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
    name2codepoint[name]=ord(codepoint)

+# python 2.2 support
+if not hasattr(__builtins__, 'basestring'): basestring=str
+
 # This RE makes Beautiful Soup able to parse XML with namespaces.
 sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')

@ -821,7 +824,8 @@ class SoupStrainer:
    def _matches(self, markup, matchAgainst):    
        #print "Matching %s against %s" % (markup, matchAgainst)
        result = False
-        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
+        if matchAgainst == True and (not hasattr(types, 'BooleanType') or
+            type(matchAgainst) == types.BooleanType):
            result = markup != None
        elif callable(matchAgainst):
            result = matchAgainst(markup)
@ -869,7 +873,7 @@ def isString(s):
    """Convenience method that works with all 2.x versions of Python
    to determine whether or not something is stringlike."""
    try:
-        return isinstance(s, unicode) or isintance(s, basestring) 
+        return isinstance(s, unicode) or isinstance(s, basestring) 
    except NameError:
        return isinstance(s, str)

@ -1284,6 +1288,12 @@ class BeautifulStoneSoup(Tag, SGMLParser):
                j = i + len(toHandle)
        return j

+    def convert_charref(self, name):
+        return '&#%s;' % name
+
+    def convert_entityref(self, name):
+        return '&%s;' % name
+
 class BeautifulSoup(BeautifulStoneSoup):

    """This parser knows the following facts about HTML:
@ -1654,6 +1664,8 @@ class UnicodeDammit:
        '''Given a string and its encoding, decodes the string into Unicode.
        %encoding is a string recognized by encodings.aliases'''

+        if not data: return u''
+
        # strip Byte Order Mark (if present)
        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
               and (data[2:4] != '\x00\x00'):
--- a/planet/init.py
+++ b/planet/init.py
@ -67,8 +67,8 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
        options = {}

        # add original options
-        for key, value in orig_config.items(list):
-            options[key] = value
+        for key in orig_config.options(list):
+            options[key] = orig_config.get(list, key)
            
        try:
            if use_cache:
@ -85,7 +85,13 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
            cached_config.set(list, key, value)

        # read list
-        base = urljoin('file:', os.path.abspath(os.path.curdir))
+        curdir=getattr(os.path, 'curdir', '.')
+        if sys.platform.find('win') < 0:
+            base = urljoin('file:', os.path.abspath(curdir))
+        else:
+            path = os.path.abspath(os.path.curdir)
+            base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
+
        request = urllib2.Request(urljoin(base + '/', list))
        if options.has_key("etag"):
            request.add_header('If-None-Match', options['etag'])
--- a/planet/config.py
+++ b/planet/config.py
@ -149,11 +149,14 @@ def load(config_file):
                    config.template_directories()]

                # merge configurations, allowing current one to override theme
+                template_files = config.template_files()
                parser.read(config_file)
                for file in config.bill_of_materials():
                    if not file in bom: bom.append(file)
                parser.set('Planet', 'bill_of_materials', ' '.join(bom))
                parser.set('Planet', 'template_directories', ' '.join(dirs))
+                parser.set('Planet', 'template_files',
+                   ' '.join(template_files + config.template_files()))
                break
        else:
            log.error('Unable to find theme %s', theme)
--- a/planet/feedparser.py
+++ b/planet/feedparser.py
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
 Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
 """

-__version__ = "4.2-pre-" + "$Revision: 1.139 $"[11:16] + "-cvs"
+__version__ = "4.2-pre-" + "$Revision: 1.141 $"[11:16] + "-cvs"
 __license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
@ -130,6 +130,18 @@ try:
 except:
    chardet = None

+# reversable htmlentitydefs mappings for Python 2.2
+try:
+  from htmlentitydefs import name2codepoint, codepoint2name
+except:
+  import htmlentitydefs
+  name2codepoint={}
+  codepoint2name={}
+  for (name,codepoint) in htmlentitydefs.entitydefs.iteritems():
+    if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
+    name2codepoint[name]=ord(codepoint)
+    codepoint2name[ord(codepoint)]=name
+
 # BeautifulSoup parser used for parsing microformats from embedded HTML content
 # http://www.crummy.com/software/BeautifulSoup/.  At the moment, it appears
 # that there is a version incompatibility, so the import is replaced with
@ -574,20 +586,9 @@ class _FeedParserMixin:
            if text.startswith('&#') and text.endswith(';'):
                return self.handle_entityref(text)
        else:
-            # entity resolution graciously donated by Aaron Swartz
-            def name2cp(k):
-                import htmlentitydefs
-                if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
-                    return htmlentitydefs.name2codepoint[k]
-                k = htmlentitydefs.entitydefs[k]
-                if k.startswith('&#x') and k.endswith(';'):
-                    return int(k[3:-1],16) # not in latin-1
-                if k.startswith('&#') and k.endswith(';'):
-                    return int(k[2:-1]) # not in latin-1
-                return ord(k)
-            try: name2cp(ref)
+            try: name2codepoint[ref]
            except KeyError: text = '&%s;' % ref
-            else: text = unichr(name2cp(ref)).encode('utf-8')
+            else: text = unichr(name2codepoint[ref]).encode('utf-8')
        self.elementstack[-1][2].append(text)

    def handle_data(self, text, escape=1):
@ -672,9 +673,9 @@ class _FeedParserMixin:
            # only if all the remaining content is nested underneath it.
            # This means that the divs would be retained in the following:
            #    <div>foo</div><div>bar</div>
-            if pieces and len(pieces)>1 and not pieces[-1].strip():
+            while pieces and len(pieces)>1 and not pieces[-1].strip():
                del pieces[-1]
-            if pieces and len(pieces)>1 and not pieces[0].strip():
+            while pieces and len(pieces)>1 and not pieces[0].strip():
                del pieces[0]
            if pieces and (pieces[0] == '<div>' or pieces[0].startswith('<div ')) and pieces[-1]=='</div>':
                depth = 0
@ -1521,6 +1522,11 @@ if _XML_AVAILABLE:

            if prefix:
                localname = prefix.lower() + ':' + localname
+            elif namespace and not qname: #Expat
+                for name,value in self.namespacesInUse.items():
+                     if name and value == namespace:
+                         localname = name + ':' + localname
+                         break
            if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))

            for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
@ -1546,6 +1552,11 @@ if _XML_AVAILABLE:
            prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
            if prefix:
                localname = prefix + ':' + localname
+            elif namespace and not qname: #Expat
+                for name,value in self.namespacesInUse.items():
+                     if name and value == namespace:
+                         localname = name + ':' + localname
+                         break
            localname = str(localname).lower()
            self.unknown_endtag(localname)

@ -1657,8 +1668,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
    def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
        # Reconstruct the original entity reference.
-        import htmlentitydefs
-        if not hasattr(htmlentitydefs, 'name2codepoint') or htmlentitydefs.name2codepoint.has_key(ref):
+        if name2codepoint.has_key(ref):
            self.pieces.append('&%(ref)s;' % locals())
        else:
            self.pieces.append('&amp;%(ref)s' % locals())
@ -1705,6 +1715,12 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
 #            self.updatepos(declstartpos, i)
            return None, -1

+    def convert_charref(self, name):
+        return '&#%s;' % name
+
+    def convert_entityref(self, name):
+        return '&%s;' % name
+
    def output(self):
        '''Return processed HTML as a single string'''
        return ''.join([str(p) for p in self.pieces])
--- a/planet/htmltmpl.py
+++ b/planet/htmltmpl.py
@ -44,6 +44,7 @@ import cgi          # for HTML escaping of variables
 import urllib       # for URL escaping of variables
 import cPickle      # for template compilation
 import gettext
+import portalocker  # for locking

 INCLUDE_DIR = "inc"

@ -57,25 +58,6 @@ PARAM_ESCAPE = 2
 PARAM_GLOBAL = 3
 PARAM_GETTEXT_STRING = 1

-# Find a way to lock files. Currently implemented only for UNIX and windows.
-LOCKTYPE_FCNTL = 1
-LOCKTYPE_MSVCRT = 2
-LOCKTYPE = None
-try:
-    import fcntl
-except:
-    try:
-        import msvcrt
-    except:
-        LOCKTYPE = None
-    else:
-        LOCKTYPE = LOCKTYPE_MSVCRT
-else:
-    LOCKTYPE = LOCKTYPE_FCNTL
-LOCK_EX = 1
-LOCK_SH = 2
-LOCK_UN = 3
-
 ##############################################
 #          CLASS: TemplateManager            #
 ##############################################
@ -129,13 +111,6 @@ class TemplateManager:

            The <em>TemplateError</em>exception is raised when the precompiled
            template cannot be saved. Precompilation is enabled by default.
-
-            Precompilation is available only on UNIX and Windows platforms,
-            because proper file locking which is necessary to ensure
-            multitask safe behaviour is platform specific and is not
-            implemented for other platforms. Attempts to enable precompilation
-            on the other platforms result in raise of the
-            <em>TemplateError</em> exception.
            
            @param comments Enable or disable template comments.
            This optional parameter can be used to enable or disable
@ -159,13 +134,6 @@ class TemplateManager:
        self._gettext = gettext
        self._debug = debug

-        # Find what module to use to lock files.
-        # File locking is necessary for the 'precompile' feature to be
-        # multitask/thread safe. Currently it works only on UNIX
-        # and Windows. Anyone willing to implement it on Mac ?
-        if precompile and not LOCKTYPE:
-                raise TemplateError, "Template precompilation is not "\
-                                     "available on this platform."
        self.DEB("INIT DONE")

    def prepare(self, file):
@ -260,33 +228,6 @@ class TemplateManager:
        """
        if self._debug: print >> sys.stderr, str

-    def lock_file(self, file, lock):
-        """ Provide platform independent file locking.
-            @hidden
-        """
-        fd = file.fileno()
-        if LOCKTYPE == LOCKTYPE_FCNTL:
-            if lock == LOCK_SH:
-                fcntl.flock(fd, fcntl.LOCK_SH)
-            elif lock == LOCK_EX:
-                fcntl.flock(fd, fcntl.LOCK_EX)
-            elif lock == LOCK_UN:
-                fcntl.flock(fd, fcntl.LOCK_UN)
-            else:
-                raise TemplateError, "BUG: bad lock in lock_file"
-        elif LOCKTYPE == LOCKTYPE_MSVCRT:
-            if lock == LOCK_SH:
-                # msvcrt does not support shared locks :-(
-                msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
-            elif lock == LOCK_EX:
-                msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
-            elif lock == LOCK_UN:
-                msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
-            else:
-                raise TemplateError, "BUG: bad lock in lock_file"
-        else:
-            raise TemplateError, "BUG: bad locktype in lock_file"
-
    def compile(self, file):
        """ Compile the template.
            @hidden
@ -322,7 +263,7 @@ class TemplateManager:
            file = None
            try:
                file = open(filename, "rb")
-                self.lock_file(file, LOCK_SH)
+                portalocker.lock(file, portalocker.LOCK_SH)
                precompiled = cPickle.load(file)
            except IOError, (errno, errstr):
                raise TemplateError, "IO error in load precompiled "\
@ -338,7 +279,7 @@ class TemplateManager:
                return precompiled
        finally:
            if file:
-                self.lock_file(file, LOCK_UN)
+                portalocker.unlock(file)
                file.close()
            if remove_bad and os.path.isfile(filename):
                # X: We may lose the original exception here, raising OSError.
@ -369,7 +310,7 @@ class TemplateManager:
            file = None
            try:
                file = open(filename, "wb")   # may truncate existing file
-                self.lock_file(file, LOCK_EX)
+                portalocker.lock(file, portalocker.LOCK_EX)
                BINARY = 1
                READABLE = 0
                if self._debug:
@ -393,7 +334,7 @@ class TemplateManager:
                self.DEB("SAVING PRECOMPILED")
        finally:
            if file:
-                self.lock_file(file, LOCK_UN)
+                portalocker.unlock(file)
                file.close()
            if remove_bad and os.path.isfile(filename):
                # X: We may lose the original exception here, raising OSError.
--- a/planet/portalocker.py
+++ b/planet/portalocker.py
@ -0,0 +1,93 @@
+# portalocker.py - Cross-platform (posix/nt) API for flock-style file locking.
+#                  Requires python 1.5.2 or better.
+# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203/index_txt
+# Except where otherwise noted, recipes in the Python Cookbook are 
+# published under the Python license.
+
+"""Cross-platform (posix/nt) API for flock-style file locking.
+
+Synopsis:
+
+   import portalocker
+   file = open("somefile", "r+")
+   portalocker.lock(file, portalocker.LOCK_EX)
+   file.seek(12)
+   file.write("foo")
+   file.close()
+
+If you know what you're doing, you may choose to
+
+   portalocker.unlock(file)
+
+before closing the file, but why?
+
+Methods:
+
+   lock( file, flags )
+   unlock( file )
+
+Constants:
+
+   LOCK_EX
+   LOCK_SH
+   LOCK_NB
+
+I learned the win32 technique for locking files from sample code
+provided by John Nielsen <nielsenjf@my-deja.com> in the documentation
+that accompanies the win32 modules.
+
+Author: Jonathan Feinberg <jdf@pobox.com>
+Version: $Id: portalocker.py,v 1.3 2001/05/29 18:47:55 Administrator Exp $
+"""
+
+import os
+
+if os.name == 'nt':
+	import win32con
+	import win32file
+	import pywintypes
+	LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
+	LOCK_SH = 0 # the default
+	LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
+	# is there any reason not to reuse the following structure?
+	__overlapped = pywintypes.OVERLAPPED()
+elif os.name == 'posix':
+	import fcntl
+	LOCK_EX = fcntl.LOCK_EX
+	LOCK_SH = fcntl.LOCK_SH
+	LOCK_NB = fcntl.LOCK_NB
+else:
+	raise RuntimeError("PortaLocker only defined for nt and posix platforms")
+
+if os.name == 'nt':
+	def lock(file, flags):
+		hfile = win32file._get_osfhandle(file.fileno())
+		win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
+
+	def unlock(file):
+		hfile = win32file._get_osfhandle(file.fileno())
+		win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
+
+elif os.name =='posix':
+	def lock(file, flags):
+		fcntl.flock(file.fileno(), flags)
+
+	def unlock(file):
+		fcntl.flock(file.fileno(), fcntl.LOCK_UN)
+
+if __name__ == '__main__':
+	from time import time, strftime, localtime
+	import sys
+	import portalocker
+
+	log = open('log.txt', "a+")
+	portalocker.lock(log, portalocker.LOCK_EX)
+
+	timestamp = strftime("%m/%d/%Y %H:%M:%S\n", localtime(time()))
+	log.write( timestamp )
+
+	print "Wrote lines. Hit enter to release lock."
+	dummy = sys.stdin.readline()
+
+	log.close()
+
--- a/planet/reconstitute.py
+++ b/planet/reconstitute.py
@ -162,7 +162,7 @@ def content(xentry, name, detail, bozo):

    xentry.appendChild(xcontent)

-def source(xsource, source, bozo):
+def source(xsource, source, bozo, format):
    """ copy source information to the entry """
    xdoc = xsource.ownerDocument

@ -193,6 +193,9 @@ def source(xsource, source, bozo):
        if key.startswith('planet_'):
            createTextElement(xsource, key.replace('_',':',1), value)

+    createTextElement(xsource, 'planet:bozo', bozo and 'true' or 'false')
+    createTextElement(xsource, 'planet:format', format)
+
 def reconstitute(feed, entry):
    """ create an entry document from a parsed feed """
    xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
@ -222,7 +225,7 @@ def reconstitute(feed, entry):
        author(xentry, 'contributor', contributor)

    xsource = xdoc.createElement('source')
-    source(xsource, entry.get('source', feed.feed), bozo)
+    source(xsource, entry.get('source', feed.feed), bozo, feed.version)
    xentry.appendChild(xsource)

    return xdoc
--- a/planet/shell/init.py
+++ b/planet/shell/init.py
@ -2,6 +2,8 @@ import planet
 import os
 import sys

+logged_modes = []
+
 def run(template_file, doc, mode='template'):
    """ select a template module based on file extension and execute it """
    log = planet.getLogger(planet.config.log_level())
@ -16,7 +18,14 @@ def run(template_file, doc, mode='template'):
        template_resolved = os.path.join(template_dir, template_file)
        if os.path.exists(template_resolved): break
    else:
-        return log.error("Unable to locate %s %s", mode, template_file)
+        log.error("Unable to locate %s %s", mode, template_file)
+        if not mode in logged_modes:
+            log.info("%s search path:", mode)
+            for template_dir in dirs:
+                log.info("    %s", os.path.realpath(template_dir))
+            logged_modes.append(mode)
+        return
+    template_resolved = os.path.realpath(template_resolved)

    # Add shell directory to the path, if not already there
    shellpath = os.path.join(sys.path[0],'planet','shell')
@ -34,13 +43,11 @@ def run(template_file, doc, mode='template'):

    # Execute the shell module
    options = planet.config.template_options(template_file)
+    log.debug("Processing %s %s using %s", mode,
+        os.path.realpath(template_resolved), module_name)
    if mode == 'filter':
-        log.debug("Processing filer %s using %s", template_resolved,
-            module_name)
        return module.run(template_resolved, doc, None, options)
    else:
-        log.info("Processing template %s using %s", template_resolved,
-            module_name)
        output_dir = planet.config.output_dir()
        output_file = os.path.join(output_dir, base)
        module.run(template_resolved, doc, output_file, options)
--- a/planet/spider.py
+++ b/planet/spider.py
@ -116,6 +116,9 @@ def spiderFeed(feed):
    data = feedparser.parse(feed_info.feed.get('planet_http_location',feed),
        etag=feed_info.feed.get('planet_http_etag',None), modified=modified)

+    # if read failed, retain cached information
+    if not data.version and feed_info.version: data.feed = feed_info.feed
+
    # capture http status
    if not data.has_key("status"):
        if data.has_key("entries") and len(data.entries)>0:
@ -166,32 +169,6 @@ def spiderFeed(feed):
            {'rel':'self', 'type':'application/atom+xml', 'href':feed}))
    for name, value in config.feed_options(feed).items():
        data.feed['planet_'+name] = value
-    
-    # identify inactive feeds
-    if config.activity_threshold(feed):
-        activity_horizon = \
-            time.gmtime(time.time()-86400*config.activity_threshold(feed))
-        updated = [entry.updated_parsed for entry in data.entries
-            if entry.has_key('updated_parsed')]
-        updated.sort()
-        if not updated or updated[-1] < activity_horizon:
-            msg = "no activity in %d days" % config.activity_threshold(feed)
-            log.info(msg)
-            data.feed['planet_message'] = msg
-
-    # report channel level errors
-    if data.status == 403:
-       data.feed['planet_message'] = "403: forbidden"
-    elif data.status == 404:
-       data.feed['planet_message'] = "404: not found"
-    elif data.status == 408:
-       data.feed['planet_message'] = "408: request timeout"
-    elif data.status == 410:
-       data.feed['planet_message'] = "410: gone"
-    elif data.status == 500:
-       data.feed['planet_message'] = "internal server error"
-    elif data.status >= 400:
-       data.feed['planet_message'] = "http status %s" % data.status

    # perform user configured scrub operations on the data
    scrub(feed, data)
@ -233,12 +210,38 @@ def spiderFeed(feed):
        # write out and timestamp the results
        write(output, cache_file) 
        os.utime(cache_file, (mtime, mtime))
+    
+    # identify inactive feeds
+    if config.activity_threshold(feed):
+        activity_horizon = \
+            time.gmtime(time.time()-86400*config.activity_threshold(feed))
+        updated = [entry.updated_parsed for entry in data.entries
+            if entry.has_key('updated_parsed')]
+        updated.sort()
+        if not updated or updated[-1] < activity_horizon:
+            msg = "no activity in %d days" % config.activity_threshold(feed)
+            log.info(msg)
+            data.feed['planet_message'] = msg
+
+    # report channel level errors
+    if data.status == 403:
+       data.feed['planet_message'] = "403: forbidden"
+    elif data.status == 404:
+       data.feed['planet_message'] = "404: not found"
+    elif data.status == 408:
+       data.feed['planet_message'] = "408: request timeout"
+    elif data.status == 410:
+       data.feed['planet_message'] = "410: gone"
+    elif data.status == 500:
+       data.feed['planet_message'] = "internal server error"
+    elif data.status >= 400:
+       data.feed['planet_message'] = "http status %s" % data.status

    # write the feed info to the cache
    if not os.path.exists(sources): os.makedirs(sources)
    xdoc=minidom.parseString('''<feed xmlns:planet="%s"
      xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
-    reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
+    reconstitute.source(xdoc.documentElement,data.feed,data.bozo,data.version)
    write(xdoc.toxml('utf-8'), filename(sources, feed))
    xdoc.unlink()

--- a/planet/splice.py
+++ b/planet/splice.py
@ -65,7 +65,7 @@ def splice():
        if not data.feed: continue
        xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
             xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
-        reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
+        reconstitute.source(xdoc.documentElement, data.feed, None, None)
        feed.appendChild(xdoc.documentElement)

    return doc
--- a/runtests.py
+++ b/runtests.py
@ -21,6 +21,10 @@ sys.path[0] = os.getcwd()
 # find all of the planet test modules
 modules = map(fullmodname, glob.glob(os.path.join('tests', 'test_*.py')))

+# enable warnings
+import planet
+planet.getLogger("WARNING")
+
 # load all of the tests into a suite
 suite = unittest.TestLoader().loadTestsFromNames(modules)

--- a/tests/data/reconstitute/source_bozo.xml
+++ b/tests/data/reconstitute/source_bozo.xml
@ -0,0 +1,8 @@
+<!--
+Description:  id
+Expect:       source.planet_bozo == 'false'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry/>
+</feed>
--- a/tests/data/reconstitute/source_format.xml
+++ b/tests/data/reconstitute/source_format.xml
@ -0,0 +1,8 @@
+<!--
+Description:  id
+Expect:       source.planet_format == 'atom10'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry/>
+</feed>
--- a/tests/test_apply.py
+++ b/tests/test_apply.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python

 import unittest, os, shutil
-from planet import config, splice
+from planet import config, splice, logger
 from xml.dom import minidom

 workdir = 'tests/work/apply'
@ -32,7 +32,7 @@ class ApplyTest(unittest.TestCase):
        for file in ['index.html', 'default.css', 'images/foaf.png']:
            path = os.path.join(workdir, file)
            self.assertTrue(os.path.exists(path))
-            self.assertTrue(os.stat(path).st_size > 0)
+            self.assertTrue(os.stat(path).st_size > 0, file + ' has size 0')

        # verify that index.html is well formed, has content, and xml:lang
        html = open(os.path.join(workdir, 'index.html'))
@ -62,3 +62,26 @@ class ApplyTest(unittest.TestCase):
        self.assertTrue(html.find('<h1>test planet</h1>')>=0)
        self.assertTrue(html.find(
          '<h4><a href="http://example.com/2">Venus</a></h4>')>=0)
+
+try:
+    import libxml2
+except ImportError:
+
+    try:
+        import win32pipe
+        (stdin,stdout) = win32pipe.popen4('xsltproc -V', 't')
+        stdin.close()
+        stdout.read()
+        try:
+            exitcode = stdout.close()
+        except IOError:
+            exitcode = -1
+    except:
+        import commands
+        (exitstatus,output) = commands.getstatusoutput('xsltproc -V')
+        exitcode = ((exitstatus>>8) & 0xFF)
+
+    if exitcode:
+        logger.warn("xsltproc is not available => can't test XSLT templates")
+        for method in dir(ApplyTest):
+            if method.startswith('test_'):  delattr(ApplyTest,method)
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python

 import unittest, xml.dom.minidom
-from planet import shell, config
+from planet import shell, config, logger

 class FilterTests(unittest.TestCase):

@ -80,7 +80,10 @@ try:
    from subprocess import Popen, PIPE
    sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE)
    sed.communicate()
-    if sed.returncode != 0: raise Exception
-except:
-    # sed is not available
-    del FilterTests.test_stripAd_yahoo
+    if sed.returncode != 0:
+        logger.warn("sed is not available => can't test stripAd_yahoo")
+        del FilterTests.test_stripAd_yahoo
+except ImportError:
+    logger.warn("Popen is not available => can't test filters")
+    for method in dir(FilterTests):
+        if method.startswith('test_'):  delattr(FilterTests,method)
--- a/tests/test_foaf.py
+++ b/tests/test_foaf.py
@ -3,7 +3,7 @@
 import unittest, os, shutil
 from planet.foaf import foaf2config
 from ConfigParser import ConfigParser
-from planet import config
+from planet import config, logger

 workdir = 'tests/work/config/cache'

@ -119,6 +119,7 @@ class FoafTest(unittest.TestCase):
 try:
    import RDF
 except:
+    logger.warn("Redland RDF is not available => can't test FOAF reading lists")
    for key in FoafTest.__dict__.keys():
        if key.startswith('test_'): delattr(FoafTest, key)

--- a/tests/test_rlists.py
+++ b/tests/test_rlists.py
@ -6,7 +6,7 @@ from os.path import split
 from glob import glob
 from ConfigParser import ConfigParser

-workdir = 'tests/work/config/cache'
+workdir = os.path.join('tests', 'work', 'config', 'cache')

 class ReadingListTest(unittest.TestCase):
    def setUp(self):
@ -38,7 +38,7 @@ class ReadingListTest(unittest.TestCase):

    def test_cache(self):
        cache = glob(os.path.join(workdir,'lists','*'))
-        self.assertTrue(1,len(cache))
+        self.assertEqual(1,len(cache))

        parser = ConfigParser()
        parser.read(cache[0])
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@ -26,11 +26,13 @@ class SpiderTest(unittest.TestCase):
        os.removedirs(os.path.split(workdir)[0])

    def test_filename(self):
-        self.assertEqual('./example.com,index.html',
+        self.assertEqual(os.path.join('.', 'example.com,index.html'),
            filename('.', 'http://example.com/index.html'))
-        self.assertEqual('./planet.intertwingly.net,2006,testfeed1,1',
+        self.assertEqual(os.path.join('.',
+            'planet.intertwingly.net,2006,testfeed1,1'),
            filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1'))
-        self.assertEqual('./00000000-0000-0000-0000-000000000000',
+        self.assertEqual(os.path.join('.',
+            '00000000-0000-0000-0000-000000000000'),
            filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000'))

        # Requires Python 2.3
@ -38,7 +40,7 @@ class SpiderTest(unittest.TestCase):
            import encodings.idna
        except:
            return
-        self.assertEqual('./xn--8ws00zhy3a.com',
+        self.assertEqual(os.path.join('.', 'xn--8ws00zhy3a.com'),
            filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))

    def test_spiderFeed(self):
@ -51,8 +53,8 @@ class SpiderTest(unittest.TestCase):
        self.assertEqual(5, len(files))

        # verify that the file names are as expected
-        self.assertTrue(workdir + 
-            '/planet.intertwingly.net,2006,testfeed1,1' in files)
+        self.assertTrue(os.path.join(workdir,
+            'planet.intertwingly.net,2006,testfeed1,1') in files)

        # verify that the file timestamps match atom:updated
        data = feedparser.parse(files[2])
@ -73,10 +75,10 @@ class SpiderTest(unittest.TestCase):
        self.assertEqual(13, len(files))

        # verify that the file names are as expected
-        self.assertTrue(workdir + 
-            '/planet.intertwingly.net,2006,testfeed1,1' in files)
-        self.assertTrue(workdir + 
-            '/planet.intertwingly.net,2006,testfeed2,1' in files)
+        self.assertTrue(os.path.join(workdir,
+            'planet.intertwingly.net,2006,testfeed1,1') in files)
+        self.assertTrue(os.path.join(workdir,
+            'planet.intertwingly.net,2006,testfeed2,1') in files)

        data = feedparser.parse(workdir + 
            '/planet.intertwingly.net,2006,testfeed3,1')