Initial load

2006-08-16 11:54:54 -04:00 · 2006-08-16 11:54:54 -04:00 · b31973d514
commit b31973d514
79 changed files with 9907 additions and 0 deletions
--- a/examples/images/feed-icon-10x10.png
+++ b/examples/images/feed-icon-10x10.png
--- a/examples/images/logo.png
+++ b/examples/images/logo.png
--- a/examples/planet.css
+++ b/examples/planet.css
@ -0,0 +1,150 @@
+body {
+	border-right: 1px solid black;
+	margin-right: 200px;
+
+	padding-left: 20px;
+	padding-right: 20px;
+}
+
+h1 {
+	margin-top: 0px;
+	padding-top: 20px;
+
+	font-family: "Bitstream Vera Sans", sans-serif;
+	font-weight: normal;
+	letter-spacing: -2px;
+	text-transform: lowercase;
+	text-align: right;
+
+	color: grey;
+}
+
+.admin {
+	text-align: right;
+}
+
+h2 {
+	font-family: "Bitstream Vera Sans", sans-serif;
+	font-weight: normal;
+	color: #200080;
+
+	margin-left: -20px;
+}
+
+h3 {
+	font-family: "Bitstream Vera Sans", sans-serif;
+	font-weight: normal;
+
+	background-color: #a0c0ff;
+	border: 1px solid #5080b0;
+
+	padding: 4px;
+}
+
+h3 a {
+	text-decoration: none;
+	color: inherit;
+}
+
+h4 {
+	font-family: "Bitstream Vera Sans", sans-serif;
+	font-weight: bold;
+}
+
+h4 a {
+	text-decoration: none;
+	color: inherit;
+}
+
+img.face {
+	float: right;
+	margin-top: -3em;
+}
+
+.entry {
+	margin-bottom: 2em;
+}
+
+.entry .date {
+	font-family: "Bitstream Vera Sans", sans-serif;
+	color: grey;
+}
+
+.entry .date a {
+	text-decoration: none;
+	color: inherit;
+}
+
+.sidebar {
+	position: absolute;
+	top: 0px;
+	right: 0px;
+	width: 200px;
+
+	margin-left: 0px;
+	margin-right: 0px;
+	padding-right: 0px;
+
+	padding-top: 20px;
+	padding-left: 0px;
+
+	font-family: "Bitstream Vera Sans", sans-serif;
+	font-size: 85%;
+}
+
+.sidebar h2 {
+	font-size: 110%;
+	font-weight: bold;
+	color: black;
+
+	padding-left: 5px;
+	margin-left: 0px;
+}
+
+.sidebar ul {
+	padding-left: 1em;
+	margin-left: 0px;
+
+	list-style-type: none;
+}
+
+.sidebar ul li:hover {
+	color: grey;
+}
+
+.sidebar ul li a {
+        text-decoration: none;
+}
+
+.sidebar ul li a:hover {
+        text-decoration: underline;
+}
+
+.sidebar ul li a img {
+        border: 0;
+}
+
+.sidebar p {
+	border-top: 1px solid grey;
+	margin-top: 30px;
+	padding-top: 10px;
+
+	padding-left: 5px;
+}
+
+.sidebar .message {
+    cursor: help;
+    border-bottom: 1px dashed red;
+}
+
+.sidebar a.message:hover {
+    cursor: help;
+	background-color: #ff0000;
+	color: #ffffff !important;
+	text-decoration: none !important;
+}
+
+a:hover {
+	text-decoration: underline !important;
+	color: blue !important;
+}
--- a/examples/planet.xslt
+++ b/examples/planet.xslt
@ -0,0 +1,65 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+                xmlns:atom="http://www.w3.org/2005/Atom"
+                xmlns:planet="http://planet.intertwingly.net/"
+                xmlns="http://www.w3.org/1999/xhtml">
+ 
+  <xsl:template match="atom:feed">
+    <html xmlns="http://www.w3.org/1999/xhtml">
+      <head>
+        <link rel="stylesheet" href="planet.css" type="text/css" />
+        <title><xsl:value-of select="atom:title"/></title>
+      </head>
+      <body>
+        <h1><xsl:value-of select="atom:title"/></h1>
+
+        <xsl:apply-templates select="atom:entry"/>
+
+        <div class="sidebar">
+          <img src="images/logo.png" width="136" height="136" alt=""/>
+
+          <h2>Subscriptions</h2>
+          <ul>
+            <xsl:for-each select="planet:subscription">
+              <xsl:sort select="planet:name"/>
+              <li>
+                <a href="{atom:link[@rel='self']/@href}" title="subscribe">
+                  <img src="images/feed-icon-10x10.png" alt="(feed)"/>
+                </a>
+                <xsl:value-of select="planet:name"/>
+              </li>
+            </xsl:for-each>
+          </ul>
+        </div>
+      </body>
+    </html>
+  </xsl:template>
+ 
+  <xsl:template match="atom:entry">
+    <xsl:variable name="date" select="substring(atom:updated,1,10)"/>
+    <xsl:if test="not(preceding-sibling::atom:entry
+      [substring(atom:updated,1,10) = $date])">
+      <h2 class="date"><xsl:value-of select="$date"/></h2>
+    </xsl:if>
+
+    <h3>
+      <a href="{atom:source/atom:link[@rel='alternate']/@href}">
+        <xsl:value-of select="atom:source/planet:name"/>
+      </a>
+        &#x2014;
+      <a href="{atom:link[@rel='alternate']/@href}">
+        <xsl:value-of select="atom:title"/>
+      </a>
+    </h3>
+
+    <div class="content">
+      <xsl:choose>
+        <xsl:when test="atom:content">
+          <p><xsl:copy-of select="atom:content/*"/></p>
+        </xsl:when>
+        <xsl:otherwise>
+          <p><xsl:copy-of select="atom:summary/*"/></p>
+        </xsl:otherwise>
+      </xsl:choose>
+    </div>
+  </xsl:template>
+</xsl:stylesheet>
--- a/planet/BeautifulSoup.py
+++ b/planet/BeautifulSoup.py
--- a/planet/init.py
+++ b/planet/init.py
@ -0,0 +1,45 @@
+logger = None
+
+def getLogger(level):
+    """ get a logger with the specified log level """
+    global logger
+    if logger: return logger
+
+    try:
+        import logging
+    except:
+        import compat_logging as logging
+
+    logging.basicConfig()
+    logging.getLogger().setLevel(logging.getLevelName(level))
+    logger = logging.getLogger("planet.runner")
+    try:
+        logger.warning
+    except:
+        logger.warning = logger.warn
+
+    return logger
+
+
+def setTimeout(timeout):
+    """ time out rather than hang forever on ultra-slow servers."""
+    if timeout:
+        try:
+            timeout = float(timeout)
+        except:
+            logger.warning("Timeout set to invalid value '%s', skipping", timeout)
+            timeout = None
+
+    if timeout:
+        try:
+            from planet import timeoutsocket
+            timeoutsocket.setDefaultSocketTimeout(timeout)
+            logger.debug("Socket timeout set to %d seconds", timeout)
+        except ImportError:
+            import socket
+            if hasattr(socket, 'setdefaulttimeout'):
+                logger.debug("timeoutsocket not found, using python function")
+                socket.setdefaulttimeout(timeout)
+                logger.debug("Socket timeout set to %d seconds", timeout)
+            else:
+                logger.error("Unable to set timeout to %d seconds", timeout)
--- a/planet/compat_logging/init.py
+++ b/planet/compat_logging/init.py
--- a/planet/compat_logging/config.py
+++ b/planet/compat_logging/config.py
@ -0,0 +1,299 @@
+# Copyright 2001-2002 by Vinay Sajip. All Rights Reserved.
+#
+# Permission to use, copy, modify, and distribute this software and its
+# documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appear in all copies and that
+# both that copyright notice and this permission notice appear in
+# supporting documentation, and that the name of Vinay Sajip
+# not be used in advertising or publicity pertaining to distribution
+# of the software without specific, written prior permission.
+# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+"""
+Logging package for Python. Based on PEP 282 and comments thereto in
+comp.lang.python, and influenced by Apache's log4j system.
+
+Should work under Python versions >= 1.5.2, except that source line
+information is not available unless 'inspect' is.
+
+Copyright (C) 2001-2002 Vinay Sajip. All Rights Reserved.
+
+To use, simply 'import logging' and log away!
+"""
+
+import sys, logging, logging.handlers, string, thread, threading, socket, struct, os
+
+from SocketServer import ThreadingTCPServer, StreamRequestHandler
+
+
+DEFAULT_LOGGING_CONFIG_PORT = 9030
+if sys.platform == "win32":
+    RESET_ERROR = 10054   #WSAECONNRESET
+else:
+    RESET_ERROR = 104     #ECONNRESET
+
+#
+#   The following code implements a socket listener for on-the-fly
+#   reconfiguration of logging.
+#
+#   _listener holds the server object doing the listening
+_listener = None
+
+def fileConfig(fname, defaults=None):
+    """
+    Read the logging configuration from a ConfigParser-format file.
+
+    This can be called several times from an application, allowing an end user
+    the ability to select from various pre-canned configurations (if the
+    developer provides a mechanism to present the choices and load the chosen
+    configuration).
+    In versions of ConfigParser which have the readfp method [typically
+    shipped in 2.x versions of Python], you can pass in a file-like object
+    rather than a filename, in which case the file-like object will be read
+    using readfp.
+    """
+    import ConfigParser
+
+    cp = ConfigParser.ConfigParser(defaults)
+    if hasattr(cp, 'readfp') and hasattr(fname, 'readline'):
+        cp.readfp(fname)
+    else:
+        cp.read(fname)
+    #first, do the formatters...
+    flist = cp.get("formatters", "keys")
+    if len(flist):
+        flist = string.split(flist, ",")
+        formatters = {}
+        for form in flist:
+            sectname = "formatter_%s" % form
+            opts = cp.options(sectname)
+            if "format" in opts:
+                fs = cp.get(sectname, "format", 1)
+            else:
+                fs = None
+            if "datefmt" in opts:
+                dfs = cp.get(sectname, "datefmt", 1)
+            else:
+                dfs = None
+            f = logging.Formatter(fs, dfs)
+            formatters[form] = f
+    #next, do the handlers...
+    #critical section...
+    logging._acquireLock()
+    try:
+        try:
+            #first, lose the existing handlers...
+            logging._handlers.clear()
+            #now set up the new ones...
+            hlist = cp.get("handlers", "keys")
+            if len(hlist):
+                hlist = string.split(hlist, ",")
+                handlers = {}
+                fixups = [] #for inter-handler references
+                for hand in hlist:
+                    sectname = "handler_%s" % hand
+                    klass = cp.get(sectname, "class")
+                    opts = cp.options(sectname)
+                    if "formatter" in opts:
+                        fmt = cp.get(sectname, "formatter")
+                    else:
+                        fmt = ""
+                    klass = eval(klass, vars(logging))
+                    args = cp.get(sectname, "args")
+                    args = eval(args, vars(logging))
+                    h = apply(klass, args)
+                    if "level" in opts:
+                        level = cp.get(sectname, "level")
+                        h.setLevel(logging._levelNames[level])
+                    if len(fmt):
+                        h.setFormatter(formatters[fmt])
+                    #temporary hack for FileHandler and MemoryHandler.
+                    if klass == logging.handlers.MemoryHandler:
+                        if "target" in opts:
+                            target = cp.get(sectname,"target")
+                        else:
+                            target = ""
+                        if len(target): #the target handler may not be loaded yet, so keep for later...
+                            fixups.append((h, target))
+                    handlers[hand] = h
+                #now all handlers are loaded, fixup inter-handler references...
+                for fixup in fixups:
+                    h = fixup[0]
+                    t = fixup[1]
+                    h.setTarget(handlers[t])
+            #at last, the loggers...first the root...
+            llist = cp.get("loggers", "keys")
+            llist = string.split(llist, ",")
+            llist.remove("root")
+            sectname = "logger_root"
+            root = logging.root
+            log = root
+            opts = cp.options(sectname)
+            if "level" in opts:
+                level = cp.get(sectname, "level")
+                log.setLevel(logging._levelNames[level])
+            for h in root.handlers[:]:
+                root.removeHandler(h)
+            hlist = cp.get(sectname, "handlers")
+            if len(hlist):
+                hlist = string.split(hlist, ",")
+                for hand in hlist:
+                    log.addHandler(handlers[hand])
+            #and now the others...
+            #we don't want to lose the existing loggers,
+            #since other threads may have pointers to them.
+            #existing is set to contain all existing loggers,
+            #and as we go through the new configuration we
+            #remove any which are configured. At the end,
+            #what's left in existing is the set of loggers
+            #which were in the previous configuration but
+            #which are not in the new configuration.
+            existing = root.manager.loggerDict.keys()
+            #now set up the new ones...
+            for log in llist:
+                sectname = "logger_%s" % log
+                qn = cp.get(sectname, "qualname")
+                opts = cp.options(sectname)
+                if "propagate" in opts:
+                    propagate = cp.getint(sectname, "propagate")
+                else:
+                    propagate = 1
+                logger = logging.getLogger(qn)
+                if qn in existing:
+                    existing.remove(qn)
+                if "level" in opts:
+                    level = cp.get(sectname, "level")
+                    logger.setLevel(logging._levelNames[level])
+                for h in logger.handlers[:]:
+                    logger.removeHandler(h)
+                logger.propagate = propagate
+                logger.disabled = 0
+                hlist = cp.get(sectname, "handlers")
+                if len(hlist):
+                    hlist = string.split(hlist, ",")
+                    for hand in hlist:
+                        logger.addHandler(handlers[hand])
+            #Disable any old loggers. There's no point deleting
+            #them as other threads may continue to hold references
+            #and by disabling them, you stop them doing any logging.
+            for log in existing:
+                root.manager.loggerDict[log].disabled = 1
+        except:
+            import traceback
+            ei = sys.exc_info()
+            traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr)
+            del ei
+    finally:
+        logging._releaseLock()
+
+def listen(port=DEFAULT_LOGGING_CONFIG_PORT):
+    """
+    Start up a socket server on the specified port, and listen for new
+    configurations.
+
+    These will be sent as a file suitable for processing by fileConfig().
+    Returns a Thread object on which you can call start() to start the server,
+    and which you can join() when appropriate. To stop the server, call
+    stopListening().
+    """
+    if not thread:
+        raise NotImplementedError, "listen() needs threading to work"
+
+    class ConfigStreamHandler(StreamRequestHandler):
+        """
+        Handler for a logging configuration request.
+
+        It expects a completely new logging configuration and uses fileConfig
+        to install it.
+        """
+        def handle(self):
+            """
+            Handle a request.
+
+            Each request is expected to be a 4-byte length,
+            followed by the config file. Uses fileConfig() to do the
+            grunt work.
+            """
+            import tempfile
+            try:
+                conn = self.connection
+                chunk = conn.recv(4)
+                if len(chunk) == 4:
+                    slen = struct.unpack(">L", chunk)[0]
+                    chunk = self.connection.recv(slen)
+                    while len(chunk) < slen:
+                        chunk = chunk + conn.recv(slen - len(chunk))
+                    #Apply new configuration. We'd like to be able to
+                    #create a StringIO and pass that in, but unfortunately
+                    #1.5.2 ConfigParser does not support reading file
+                    #objects, only actual files. So we create a temporary
+                    #file and remove it later.
+                    file = tempfile.mktemp(".ini")
+                    f = open(file, "w")
+                    f.write(chunk)
+                    f.close()
+                    fileConfig(file)
+                    os.remove(file)
+            except socket.error, e:
+                if type(e.args) != types.TupleType:
+                    raise
+                else:
+                    errcode = e.args[0]
+                    if errcode != RESET_ERROR:
+                        raise
+
+    class ConfigSocketReceiver(ThreadingTCPServer):
+        """
+        A simple TCP socket-based logging config receiver.
+        """
+
+        allow_reuse_address = 1
+
+        def __init__(self, host='localhost', port=DEFAULT_LOGGING_CONFIG_PORT,
+                     handler=None):
+            ThreadingTCPServer.__init__(self, (host, port), handler)
+            logging._acquireLock()
+            self.abort = 0
+            logging._releaseLock()
+            self.timeout = 1
+
+        def serve_until_stopped(self):
+            import select
+            abort = 0
+            while not abort:
+                rd, wr, ex = select.select([self.socket.fileno()],
+                                           [], [],
+                                           self.timeout)
+                if rd:
+                    self.handle_request()
+                logging._acquireLock()
+                abort = self.abort
+                logging._releaseLock()
+
+    def serve(rcvr, hdlr, port):
+        server = rcvr(port=port, handler=hdlr)
+        global _listener
+        logging._acquireLock()
+        _listener = server
+        logging._releaseLock()
+        server.serve_until_stopped()
+
+    return threading.Thread(target=serve,
+                            args=(ConfigSocketReceiver,
+                                  ConfigStreamHandler, port))
+
+def stopListening():
+    """
+    Stop the listening server which was created with a call to listen().
+    """
+    global _listener
+    if _listener:
+        logging._acquireLock()
+        _listener.abort = 1
+        _listener = None
+        logging._releaseLock()
--- a/planet/compat_logging/handlers.py
+++ b/planet/compat_logging/handlers.py
@ -0,0 +1,728 @@
+# Copyright 2001-2002 by Vinay Sajip. All Rights Reserved.
+#
+# Permission to use, copy, modify, and distribute this software and its
+# documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appear in all copies and that
+# both that copyright notice and this permission notice appear in
+# supporting documentation, and that the name of Vinay Sajip
+# not be used in advertising or publicity pertaining to distribution
+# of the software without specific, written prior permission.
+# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+"""
+Logging package for Python. Based on PEP 282 and comments thereto in
+comp.lang.python, and influenced by Apache's log4j system.
+
+Should work under Python versions >= 1.5.2, except that source line
+information is not available unless 'inspect' is.
+
+Copyright (C) 2001-2002 Vinay Sajip. All Rights Reserved.
+
+To use, simply 'import logging' and log away!
+"""
+
+import sys, logging, socket, types, os, string, cPickle, struct, time
+
+from SocketServer import ThreadingTCPServer, StreamRequestHandler
+
+#
+# Some constants...
+#
+
+DEFAULT_TCP_LOGGING_PORT    = 9020
+DEFAULT_UDP_LOGGING_PORT    = 9021
+DEFAULT_HTTP_LOGGING_PORT   = 9022
+DEFAULT_SOAP_LOGGING_PORT   = 9023
+SYSLOG_UDP_PORT             = 514
+
+
+class RotatingFileHandler(logging.FileHandler):
+    def __init__(self, filename, mode="a", maxBytes=0, backupCount=0):
+        """
+        Open the specified file and use it as the stream for logging.
+
+        By default, the file grows indefinitely. You can specify particular
+        values of maxBytes and backupCount to allow the file to rollover at
+        a predetermined size.
+
+        Rollover occurs whenever the current log file is nearly maxBytes in
+        length. If backupCount is >= 1, the system will successively create
+        new files with the same pathname as the base file, but with extensions
+        ".1", ".2" etc. appended to it. For example, with a backupCount of 5
+        and a base file name of "app.log", you would get "app.log",
+        "app.log.1", "app.log.2", ... through to "app.log.5". The file being
+        written to is always "app.log" - when it gets filled up, it is closed
+        and renamed to "app.log.1", and if files "app.log.1", "app.log.2" etc.
+        exist, then they are renamed to "app.log.2", "app.log.3" etc.
+        respectively.
+
+        If maxBytes is zero, rollover never occurs.
+        """
+        logging.FileHandler.__init__(self, filename, mode)
+        self.maxBytes = maxBytes
+        self.backupCount = backupCount
+        if maxBytes > 0:
+            self.mode = "a"
+
+    def doRollover(self):
+        """
+        Do a rollover, as described in __init__().
+        """
+
+        self.stream.close()
+        if self.backupCount > 0:
+            for i in range(self.backupCount - 1, 0, -1):
+                sfn = "%s.%d" % (self.baseFilename, i)
+                dfn = "%s.%d" % (self.baseFilename, i + 1)
+                if os.path.exists(sfn):
+                    #print "%s -> %s" % (sfn, dfn)
+                    if os.path.exists(dfn):
+                        os.remove(dfn)
+                    os.rename(sfn, dfn)
+            dfn = self.baseFilename + ".1"
+            if os.path.exists(dfn):
+                os.remove(dfn)
+            os.rename(self.baseFilename, dfn)
+            #print "%s -> %s" % (self.baseFilename, dfn)
+        self.stream = open(self.baseFilename, "w")
+
+    def emit(self, record):
+        """
+        Emit a record.
+
+        Output the record to the file, catering for rollover as described
+        in doRollover().
+        """
+        if self.maxBytes > 0:                   # are we rolling over?
+            msg = "%s\n" % self.format(record)
+            self.stream.seek(0, 2)  #due to non-posix-compliant Windows feature
+            if self.stream.tell() + len(msg) >= self.maxBytes:
+                self.doRollover()
+        logging.FileHandler.emit(self, record)
+
+
+class SocketHandler(logging.Handler):
+    """
+    A handler class which writes logging records, in pickle format, to
+    a streaming socket. The socket is kept open across logging calls.
+    If the peer resets it, an attempt is made to reconnect on the next call.
+    The pickle which is sent is that of the LogRecord's attribute dictionary
+    (__dict__), so that the receiver does not need to have the logging module
+    installed in order to process the logging event.
+
+    To unpickle the record at the receiving end into a LogRecord, use the
+    makeLogRecord function.
+    """
+
+    def __init__(self, host, port):
+        """
+        Initializes the handler with a specific host address and port.
+
+        The attribute 'closeOnError' is set to 1 - which means that if
+        a socket error occurs, the socket is silently closed and then
+        reopened on the next logging call.
+        """
+        logging.Handler.__init__(self)
+        self.host = host
+        self.port = port
+        self.sock = None
+        self.closeOnError = 0
+
+    def makeSocket(self):
+        """
+        A factory method which allows subclasses to define the precise
+        type of socket they want.
+        """
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        s.connect((self.host, self.port))
+        return s
+
+    def send(self, s):
+        """
+        Send a pickled string to the socket.
+
+        This function allows for partial sends which can happen when the
+        network is busy.
+        """
+        if hasattr(self.sock, "sendall"):
+            self.sock.sendall(s)
+        else:
+            sentsofar = 0
+            left = len(s)
+            while left > 0:
+                sent = self.sock.send(s[sentsofar:])
+                sentsofar = sentsofar + sent
+                left = left - sent
+
+    def makePickle(self, record):
+        """
+        Pickles the record in binary format with a length prefix, and
+        returns it ready for transmission across the socket.
+        """
+        s = cPickle.dumps(record.__dict__, 1)
+        #n = len(s)
+        #slen = "%c%c" % ((n >> 8) & 0xFF, n & 0xFF)
+        slen = struct.pack(">L", len(s))
+        return slen + s
+
+    def handleError(self, record):
+        """
+        Handle an error during logging.
+
+        An error has occurred during logging. Most likely cause -
+        connection lost. Close the socket so that we can retry on the
+        next event.
+        """
+        if self.closeOnError and self.sock:
+            self.sock.close()
+            self.sock = None        #try to reconnect next time
+        else:
+            logging.Handler.handleError(self, record)
+
+    def emit(self, record):
+        """
+        Emit a record.
+
+        Pickles the record and writes it to the socket in binary format.
+        If there is an error with the socket, silently drop the packet.
+        If there was a problem with the socket, re-establishes the
+        socket.
+        """
+        try:
+            s = self.makePickle(record)
+            if not self.sock:
+                self.sock = self.makeSocket()
+            self.send(s)
+        except:
+            self.handleError(record)
+
+    def close(self):
+        """
+        Closes the socket.
+        """
+        if self.sock:
+            self.sock.close()
+            self.sock = None
+
+class DatagramHandler(SocketHandler):
+    """
+    A handler class which writes logging records, in pickle format, to
+    a datagram socket.  The pickle which is sent is that of the LogRecord's
+    attribute dictionary (__dict__), so that the receiver does not need to
+    have the logging module installed in order to process the logging event.
+
+    To unpickle the record at the receiving end into a LogRecord, use the
+    makeLogRecord function.
+
+    """
+    def __init__(self, host, port):
+        """
+        Initializes the handler with a specific host address and port.
+        """
+        SocketHandler.__init__(self, host, port)
+        self.closeOnError = 0
+
+    def makeSocket(self):
+        """
+        The factory method of SocketHandler is here overridden to create
+        a UDP socket (SOCK_DGRAM).
+        """
+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        return s
+
+    def send(self, s):
+        """
+        Send a pickled string to a socket.
+
+        This function no longer allows for partial sends which can happen
+        when the network is busy - UDP does not guarantee delivery and
+        can deliver packets out of sequence.
+        """
+        self.sock.sendto(s, (self.host, self.port))
+
+class SysLogHandler(logging.Handler):
+    """
+    A handler class which sends formatted logging records to a syslog
+    server. Based on Sam Rushing's syslog module:
+    http://www.nightmare.com/squirl/python-ext/misc/syslog.py
+    Contributed by Nicolas Untz (after which minor refactoring changes
+    have been made).
+    """
+
+    # from <linux/sys/syslog.h>:
+    # ======================================================================
+    # priorities/facilities are encoded into a single 32-bit quantity, where
+    # the bottom 3 bits are the priority (0-7) and the top 28 bits are the
+    # facility (0-big number). Both the priorities and the facilities map
+    # roughly one-to-one to strings in the syslogd(8) source code.  This
+    # mapping is included in this file.
+    #
+    # priorities (these are ordered)
+
+    LOG_EMERG     = 0       #  system is unusable
+    LOG_ALERT     = 1       #  action must be taken immediately
+    LOG_CRIT      = 2       #  critical conditions
+    LOG_ERR       = 3       #  error conditions
+    LOG_WARNING   = 4       #  warning conditions
+    LOG_NOTICE    = 5       #  normal but significant condition
+    LOG_INFO      = 6       #  informational
+    LOG_DEBUG     = 7       #  debug-level messages
+
+    #  facility codes
+    LOG_KERN      = 0       #  kernel messages
+    LOG_USER      = 1       #  random user-level messages
+    LOG_MAIL      = 2       #  mail system
+    LOG_DAEMON    = 3       #  system daemons
+    LOG_AUTH      = 4       #  security/authorization messages
+    LOG_SYSLOG    = 5       #  messages generated internally by syslogd
+    LOG_LPR       = 6       #  line printer subsystem
+    LOG_NEWS      = 7       #  network news subsystem
+    LOG_UUCP      = 8       #  UUCP subsystem
+    LOG_CRON      = 9       #  clock daemon
+    LOG_AUTHPRIV  = 10  #  security/authorization messages (private)
+
+    #  other codes through 15 reserved for system use
+    LOG_LOCAL0    = 16      #  reserved for local use
+    LOG_LOCAL1    = 17      #  reserved for local use
+    LOG_LOCAL2    = 18      #  reserved for local use
+    LOG_LOCAL3    = 19      #  reserved for local use
+    LOG_LOCAL4    = 20      #  reserved for local use
+    LOG_LOCAL5    = 21      #  reserved for local use
+    LOG_LOCAL6    = 22      #  reserved for local use
+    LOG_LOCAL7    = 23      #  reserved for local use
+
+    priority_names = {
+        "alert":    LOG_ALERT,
+        "crit":     LOG_CRIT,
+        "critical": LOG_CRIT,
+        "debug":    LOG_DEBUG,
+        "emerg":    LOG_EMERG,
+        "err":      LOG_ERR,
+        "error":    LOG_ERR,        #  DEPRECATED
+        "info":     LOG_INFO,
+        "notice":   LOG_NOTICE,
+        "panic":    LOG_EMERG,      #  DEPRECATED
+        "warn":     LOG_WARNING,    #  DEPRECATED
+        "warning":  LOG_WARNING,
+        }
+
+    facility_names = {
+        "auth":     LOG_AUTH,
+        "authpriv": LOG_AUTHPRIV,
+        "cron":     LOG_CRON,
+        "daemon":   LOG_DAEMON,
+        "kern":     LOG_KERN,
+        "lpr":      LOG_LPR,
+        "mail":     LOG_MAIL,
+        "news":     LOG_NEWS,
+        "security": LOG_AUTH,       #  DEPRECATED
+        "syslog":   LOG_SYSLOG,
+        "user":     LOG_USER,
+        "uucp":     LOG_UUCP,
+        "local0":   LOG_LOCAL0,
+        "local1":   LOG_LOCAL1,
+        "local2":   LOG_LOCAL2,
+        "local3":   LOG_LOCAL3,
+        "local4":   LOG_LOCAL4,
+        "local5":   LOG_LOCAL5,
+        "local6":   LOG_LOCAL6,
+        "local7":   LOG_LOCAL7,
+        }
+
+    def __init__(self, address=('localhost', SYSLOG_UDP_PORT), facility=LOG_USER):
+        """
+        Initialize a handler.
+
+        If address is specified as a string, UNIX socket is used.
+        If facility is not specified, LOG_USER is used.
+        """
+        logging.Handler.__init__(self)
+
+        self.address = address
+        self.facility = facility
+        if type(address) == types.StringType:
+            self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
+            # syslog may require either DGRAM or STREAM sockets
+            try:
+                self.socket.connect(address)
+            except socket.error:
+                self.socket.close()
+                self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            self.socket.connect(address)
+            self.unixsocket = 1
+        else:
+            self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+            self.unixsocket = 0
+
+        self.formatter = None
+
+    # curious: when talking to the unix-domain '/dev/log' socket, a
+    #   zero-terminator seems to be required.  this string is placed
+    #   into a class variable so that it can be overridden if
+    #   necessary.
+    log_format_string = '<%d>%s\000'
+
+    def encodePriority (self, facility, priority):
+        """
+        Encode the facility and priority. You can pass in strings or
+        integers - if strings are passed, the facility_names and
+        priority_names mapping dictionaries are used to convert them to
+        integers.
+        """
+        if type(facility) == types.StringType:
+            facility = self.facility_names[facility]
+        if type(priority) == types.StringType:
+            priority = self.priority_names[priority]
+        return (facility << 3) | priority
+
+    def close (self):
+        """
+        Closes the socket.
+        """
+        if self.unixsocket:
+            self.socket.close()
+
+    def emit(self, record):
+        """
+        Emit a record.
+
+        The record is formatted, and then sent to the syslog server. If
+        exception information is present, it is NOT sent to the server.
+        """
+        msg = self.format(record)
+        """
+        We need to convert record level to lowercase, maybe this will
+        change in the future.
+        """
+        msg = self.log_format_string % (
+            self.encodePriority(self.facility,
+                                string.lower(record.levelname)),
+            msg)
+        try:
+            if self.unixsocket:
+                self.socket.send(msg)
+            else:
+                self.socket.sendto(msg, self.address)
+        except:
+            self.handleError(record)
+
+class SMTPHandler(logging.Handler):
+    """
+    A handler class which sends an SMTP email for each logging event.
+    """
+    def __init__(self, mailhost, fromaddr, toaddrs, subject):
+        """
+        Initialize the handler.
+
+        Initialize the instance with the from and to addresses and subject
+        line of the email. To specify a non-standard SMTP port, use the
+        (host, port) tuple format for the mailhost argument.
+        """
+        logging.Handler.__init__(self)
+        if type(mailhost) == types.TupleType:
+            host, port = mailhost
+            self.mailhost = host
+            self.mailport = port
+        else:
+            self.mailhost = mailhost
+            self.mailport = None
+        self.fromaddr = fromaddr
+        if type(toaddrs) == types.StringType:
+            toaddrs = [toaddrs]
+        self.toaddrs = toaddrs
+        self.subject = subject
+
+    def getSubject(self, record):
+        """
+        Determine the subject for the email.
+
+        If you want to specify a subject line which is record-dependent,
+        override this method.
+        """
+        return self.subject
+
+    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+
+    monthname = [None,
+                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+
+    def date_time(self):
+        """Return the current date and time formatted for a MIME header."""
+        year, month, day, hh, mm, ss, wd, y, z = time.gmtime(time.time())
+        s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
+                self.weekdayname[wd],
+                day, self.monthname[month], year,
+                hh, mm, ss)
+        return s
+
+    def emit(self, record):
+        """
+        Emit a record.
+
+        Format the record and send it to the specified addressees.
+        """
+        try:
+            import smtplib
+            port = self.mailport
+            if not port:
+                port = smtplib.SMTP_PORT
+            smtp = smtplib.SMTP(self.mailhost, port)
+            msg = self.format(record)
+            msg = "From: %s\r\nTo: %s\r\nSubject: %s\r\nDate: %s\r\n\r\n%s" % (
+                            self.fromaddr,
+                            string.join(self.toaddrs, ","),
+                            self.getSubject(record),
+                            self.date_time(), msg)
+            smtp.sendmail(self.fromaddr, self.toaddrs, msg)
+            smtp.quit()
+        except:
+            self.handleError(record)
+
+class NTEventLogHandler(logging.Handler):
+    """
+    A handler class which sends events to the NT Event Log. Adds a
+    registry entry for the specified application name. If no dllname is
+    provided, win32service.pyd (which contains some basic message
+    placeholders) is used. Note that use of these placeholders will make
+    your event logs big, as the entire message source is held in the log.
+    If you want slimmer logs, you have to pass in the name of your own DLL
+    which contains the message definitions you want to use in the event log.
+    """
+    def __init__(self, appname, dllname=None, logtype="Application"):
+        logging.Handler.__init__(self)
+        try:
+            import win32evtlogutil, win32evtlog
+            self.appname = appname
+            self._welu = win32evtlogutil
+            if not dllname:
+                dllname = os.path.split(self._welu.__file__)
+                dllname = os.path.split(dllname[0])
+                dllname = os.path.join(dllname[0], r'win32service.pyd')
+            self.dllname = dllname
+            self.logtype = logtype
+            self._welu.AddSourceToRegistry(appname, dllname, logtype)
+            self.deftype = win32evtlog.EVENTLOG_ERROR_TYPE
+            self.typemap = {
+                logging.DEBUG   : win32evtlog.EVENTLOG_INFORMATION_TYPE,
+                logging.INFO    : win32evtlog.EVENTLOG_INFORMATION_TYPE,
+                logging.WARNING : win32evtlog.EVENTLOG_WARNING_TYPE,
+                logging.ERROR   : win32evtlog.EVENTLOG_ERROR_TYPE,
+                logging.CRITICAL: win32evtlog.EVENTLOG_ERROR_TYPE,
+         }
+        except ImportError:
+            print "The Python Win32 extensions for NT (service, event "\
+                        "logging) appear not to be available."
+            self._welu = None
+
+    def getMessageID(self, record):
+        """
+        Return the message ID for the event record. If you are using your
+        own messages, you could do this by having the msg passed to the
+        logger being an ID rather than a formatting string. Then, in here,
+        you could use a dictionary lookup to get the message ID. This
+        version returns 1, which is the base message ID in win32service.pyd.
+        """
+        return 1
+
+    def getEventCategory(self, record):
+        """
+        Return the event category for the record.
+
+        Override this if you want to specify your own categories. This version
+        returns 0.
+        """
+        return 0
+
+    def getEventType(self, record):
+        """
+        Return the event type for the record.
+
+        Override this if you want to specify your own types. This version does
+        a mapping using the handler's typemap attribute, which is set up in
+        __init__() to a dictionary which contains mappings for DEBUG, INFO,
+        WARNING, ERROR and CRITICAL. If you are using your own levels you will
+        either need to override this method or place a suitable dictionary in
+        the handler's typemap attribute.
+        """
+        return self.typemap.get(record.levelno, self.deftype)
+
+    def emit(self, record):
+        """
+        Emit a record.
+
+        Determine the message ID, event category and event type. Then
+        log the message in the NT event log.
+        """
+        if self._welu:
+            try:
+                id = self.getMessageID(record)
+                cat = self.getEventCategory(record)
+                type = self.getEventType(record)
+                msg = self.format(record)
+                self._welu.ReportEvent(self.appname, id, cat, type, [msg])
+            except:
+                self.handleError(record)
+
+    def close(self):
+        """
+        Clean up this handler.
+
+        You can remove the application name from the registry as a
+        source of event log entries. However, if you do this, you will
+        not be able to see the events as you intended in the Event Log
+        Viewer - it needs to be able to access the registry to get the
+        DLL name.
+        """
+        #self._welu.RemoveSourceFromRegistry(self.appname, self.logtype)
+        pass
+
+class HTTPHandler(logging.Handler):
+    """
+    A class which sends records to a Web server, using either GET or
+    POST semantics.
+    """
+    def __init__(self, host, url, method="GET"):
+        """
+        Initialize the instance with the host, the request URL, and the method
+        ("GET" or "POST")
+        """
+        logging.Handler.__init__(self)
+        method = string.upper(method)
+        if method not in ["GET", "POST"]:
+            raise ValueError, "method must be GET or POST"
+        self.host = host
+        self.url = url
+        self.method = method
+
+    def mapLogRecord(self, record):
+        """
+        Default implementation of mapping the log record into a dict
+        that is send as the CGI data. Overwrite in your class.
+        Contributed by Franz  Glasner.
+        """
+        return record.__dict__
+
+    def emit(self, record):
+        """
+        Emit a record.
+
+        Send the record to the Web server as an URL-encoded dictionary
+        """
+        try:
+            import httplib, urllib
+            h = httplib.HTTP(self.host)
+            url = self.url
+            data = urllib.urlencode(self.mapLogRecord(record))
+            if self.method == "GET":
+                if (string.find(url, '?') >= 0):
+                    sep = '&'
+                else:
+                    sep = '?'
+                url = url + "%c%s" % (sep, data)
+            h.putrequest(self.method, url)
+            if self.method == "POST":
+                h.putheader("Content-length", str(len(data)))
+            h.endheaders()
+            if self.method == "POST":
+                h.send(data)
+            h.getreply()    #can't do anything with the result
+        except:
+            self.handleError(record)
+
+class BufferingHandler(logging.Handler):
+    """
+  A handler class which buffers logging records in memory. Whenever each
+  record is added to the buffer, a check is made to see if the buffer should
+  be flushed. If it should, then flush() is expected to do what's needed.
+    """
+    def __init__(self, capacity):
+        """
+        Initialize the handler with the buffer size.
+        """
+        logging.Handler.__init__(self)
+        self.capacity = capacity
+        self.buffer = []
+
+    def shouldFlush(self, record):
+        """
+        Should the handler flush its buffer?
+
+        Returns true if the buffer is up to capacity. This method can be
+        overridden to implement custom flushing strategies.
+        """
+        return (len(self.buffer) >= self.capacity)
+
+    def emit(self, record):
+        """
+        Emit a record.
+
+        Append the record. If shouldFlush() tells us to, call flush() to process
+        the buffer.
+        """
+        self.buffer.append(record)
+        if self.shouldFlush(record):
+            self.flush()
+
+    def flush(self):
+        """
+        Override to implement custom flushing behaviour.
+
+        This version just zaps the buffer to empty.
+        """
+        self.buffer = []
+
+class MemoryHandler(BufferingHandler):
+    """
+    A handler class which buffers logging records in memory, periodically
+    flushing them to a target handler. Flushing occurs whenever the buffer
+    is full, or when an event of a certain severity or greater is seen.
+    """
+    def __init__(self, capacity, flushLevel=logging.ERROR, target=None):
+        """
+        Initialize the handler with the buffer size, the level at which
+        flushing should occur and an optional target.
+
+        Note that without a target being set either here or via setTarget(),
+        a MemoryHandler is no use to anyone!
+        """
+        BufferingHandler.__init__(self, capacity)
+        self.flushLevel = flushLevel
+        self.target = target
+
+    def shouldFlush(self, record):
+        """
+        Check for buffer full or a record at the flushLevel or higher.
+        """
+        return (len(self.buffer) >= self.capacity) or \
+                (record.levelno >= self.flushLevel)
+
+    def setTarget(self, target):
+        """
+        Set the target handler for this handler.
+        """
+        self.target = target
+
+    def flush(self):
+        """
+        For a MemoryHandler, flushing means just sending the buffered
+        records to the target, if there is one. Override if you want
+        different behaviour.
+        """
+        if self.target:
+            for record in self.buffer:
+                self.target.handle(record)
+            self.buffer = []
+
+    def close(self):
+        """
+        Flush, set the target to None and lose the buffer.
+        """
+        self.flush()
+        self.target = None
+        self.buffer = []
--- a/planet/config.py
+++ b/planet/config.py
@ -0,0 +1,112 @@
+"""
+Planet Configuration
+
+This module encapsulates all planet configuration.  This is not a generic
+configuration parser, it knows everything about configuring a planet - from
+the structure of the ini file, to knowledge of data types, even down to
+what are the defaults.
+
+Usage:
+  from planet import config
+  config.load('config.ini')
+
+  # administrative / structural information
+  print config.templates()
+  print config.feeds()
+
+  # planet wide configuration
+  print config.name()
+  print config.link()
+
+  # per template configuration
+  print config.days_per_page('atom.xml.tmpl')
+  print config.encoding('index.html.tmpl')
+
+Todo:
+  * error handling (example: no planet section)
+"""
+
+import sys
+from ConfigParser import ConfigParser
+
+parser = ConfigParser()
+
+planet_predefined_options = []
+
+def __init__():
+    """define the struture of an ini file"""
+    from planet import config
+
+    def get(section, option, default):
+        if section and parser.has_option(section, option):
+            return parser.get(section, option)
+        elif parser.has_option('Planet', option):
+            return parser.get('Planet', option)
+        else:
+            return default
+
+    def define_planet(name, default):
+        setattr(config, name, lambda default=default: get(None,name,default))
+        planet_predefined_options.append(name)
+
+    def define_tmpl(name, default):
+        setattr(config, name, lambda section, default=default:
+            get(section,name,default))
+
+    def define_tmpl_int(name, default):
+        setattr(config, name, lambda section, default=default:
+            int(get(section,name,default)))
+
+    # planet wide options
+    define_planet('name', "Unconfigured Planet")
+    define_planet('link', "Unconfigured Planet")
+    define_planet('cache_directory', "cache")
+    define_planet('log_level', "WARNING")
+    define_planet('feed_timeout', 20)
+
+    # template options
+    define_tmpl_int('days_per_page', 0)
+    define_tmpl_int('items_per_page', 60)
+    define_tmpl('encoding', 'utf-8')
+
+    # prevent re-initialization
+    setattr(config, '__init__', lambda: None)
+
+def load(file):
+    """ initialize and load a configuration"""
+    __init__()
+    global parser
+    parser = ConfigParser()
+    parser.read(file)
+
+def template_files():
+    """ list the templates defined """
+    return parser.get('Planet','template_files').split(' ')
+
+def feeds():
+    """ list the feeds defined """
+    return filter(lambda feed: feed!='Planet' and feed not in template_files(),
+       parser.sections())
+
+def planet_options():
+    """ dictionary of planet wide options"""
+    return dict(map(lambda opt: (opt, parser.get('Planet',opt)),
+        parser.options('Planet')))
+
+def feed_options(section):
+    """ dictionary of feed specific options"""
+    from planet import config
+    options = dict([(key,value) for key,value in planet_options().items()
+        if key not in planet_predefined_options])
+    if parser.has_section(section):
+        options.update(dict(map(lambda opt: (opt, parser.get(section,opt)),
+            parser.options(section))))
+    return options
+
+def template_options(section):
+    """ dictionary of template specific options"""
+    return feed_options(section)
+
+def write(file=sys.stdout):
+    """ write out an updated template """
+    print parser.write(file)
--- a/planet/feedparser.py
+++ b/planet/feedparser.py
--- a/planet/reconstitute.py
+++ b/planet/reconstitute.py
@ -0,0 +1,195 @@
+"""
+Reconstitute an entry document from the output of the Universal Feed Parser.
+
+The main entry point is called 'reconstitute'.  Input parameters are:
+
+  results: this is the entire hash table return by the UFP
+  entry:   this is the entry in the hash that you want reconstituted
+
+The value returned is an XML DOM.  Every effort is made to convert
+everything to unicode, and text fields into either plain text or
+well formed XHTML.
+
+Todo:
+  * extension elements
+"""
+import re, time, md5, sgmllib
+from xml.sax.saxutils import escape
+from xml.dom import minidom
+from BeautifulSoup import BeautifulSoup
+from xml.parsers.expat import ExpatError
+
+illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
+
+def createTextElement(parent, name, value):
+    """ utility function to create a child element with the specified text"""
+    if not value: return
+    xdoc = parent.ownerDocument
+    xelement = xdoc.createElement(name)
+    xelement.appendChild(xdoc.createTextNode(value))
+    parent.appendChild(xelement)
+
+def invalidate(c): 
+    """ replace invalid characters """
+    return '<acronym title="U+%s">\xef\xbf\xbd</acronym>' % \
+        hex(ord(c.group(0)))[2:].rjust(4,'0')
+
+def ncr2c(value):
+    """ convert numeric character references to characters """
+    value=value.group(1)
+    if value.startswith('x'):
+        value=unichr(int(value[1:],16))
+    else:
+        value=unichr(int(value))
+    return value
+
+def normalize(text, bozo):
+    """ convert everything to well formed XML """
+    if text.has_key('type'):
+        if text.type.lower().find('html')<0:
+            text['value'] = escape(text.value)
+            text['type'] = 'text/html'
+        if text.type.lower() == 'text/html' or bozo:
+            dom=BeautifulSoup(text.value,convertEntities="html")
+            for tag in dom.findAll(True):
+                for attr,value in tag.attrs:
+                    value=sgmllib.charref.sub(ncr2c,value)
+                    value=illegal_xml_chars.sub(u'\uFFFD',value)
+                    tag[attr]=value
+            text['value'] = illegal_xml_chars.sub(invalidate, str(dom))
+    return text
+
+def id(xentry, entry):
+    """ copy or compute an id for the entry """
+
+    if entry.has_key("id"):
+        entry_id = entry.id
+    elif entry.has_key("link"):
+        entry_id = entry.link
+    elif entry.has_key("title"):
+        entry_id = (entry.title_detail.base + "/" + 
+            md5.new(entry.title).hexdigest())
+    elif entry.has_key("summary"):
+        entry_id = (entry.summary_detail.base + "/" + 
+            md5.new(entry.summary).hexdigest())
+    elif entry.has_key("content"):
+        entry_id = (entry.content[0].base + "/" + 
+            md5.new(entry.content[0].value).hexdigest())
+    else:
+        return
+
+    if xentry: createTextElement(xentry, 'id', entry_id)
+    return entry_id
+
+def links(xentry, entry):
+    """ copy links to the entry """
+    if not entry.has_key('links'): return
+    xdoc = xentry.ownerDocument
+    for link in entry.links:
+        xlink = xdoc.createElement('link')
+        xlink.setAttribute('type', link.type)
+        xlink.setAttribute('href', link.href)
+        xlink.setAttribute('rel', link.rel)
+        xentry.appendChild(xlink)
+
+def date(xentry, name, parsed):
+    """ insert a date-formated element into the entry """
+    if not parsed: return
+    formatted = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsed)
+    createTextElement(xentry, name, formatted)
+
+def author(xentry, name, detail):
+    """ insert an author-like element into the entry """
+    if not detail: return
+    xdoc = xentry.ownerDocument
+    xauthor = xdoc.createElement(name)
+
+    createTextElement(xauthor, 'name', detail.get('name', None))
+    createTextElement(xauthor, 'email', detail.get('email', None))
+    createTextElement(xauthor, 'uri', detail.get('href', None))
+        
+    xentry.appendChild(xauthor)
+
+def content(xentry, name, detail, bozo):
+    """ insert a content-like element into the entry """
+    if not detail or not detail.value: return
+    normalize(detail, bozo)
+    xdoc = xentry.ownerDocument
+    xcontent = xdoc.createElement(name)
+
+    try:
+        # see if the resulting text is a well-formed XML fragment
+        div = '<div xmlns="http://www.w3.org/1999/xhtml">%s</div>'
+        if isinstance(detail.value,unicode):
+            detail.value=detail.value.encode('utf-8')
+        data = minidom.parseString(div % detail.value).documentElement
+
+        if detail.value.find('<') < 0:
+            xcontent.appendChild(data.firstChild)
+        else:
+            xcontent.setAttribute('type', 'xhtml')
+            xcontent.appendChild(data)
+
+    except ExpatError:
+        # leave as html
+        xcontent.setAttribute('type', 'html')
+        xcontent.appendChild(xdoc.createTextNode(detail.value.decode('utf-8')))
+
+    if detail.language:
+        xcontent.setAttribute('xml:lang', detail.language)
+
+    xentry.appendChild(xcontent)
+
+def source(xentry, source, bozo):
+    """ copy source information to the entry """
+    xdoc = xentry.ownerDocument
+    xsource = xdoc.createElement('source')
+
+    createTextElement(xsource, 'id', source.get('id', None))
+    createTextElement(xsource, 'icon', source.get('icon', None))
+    createTextElement(xsource, 'logo', source.get('logo', None))
+
+    author(xsource, 'author', source.get('author_detail',None))
+    for contributor in source.get('contributors',[]):
+        author(xsource, 'contributor', contributor)
+
+    links(xsource, source)
+
+    content(xsource, 'rights', source.get('rights_detail',None), bozo)
+    content(xsource, 'subtitle', source.get('subtitle_detail',None), bozo)
+    content(xsource, 'title', source.get('title_detail',None), bozo)
+
+    date(xsource, 'updated', source.get('updated_parsed',None))
+
+    # propagate planet inserted information
+    for key, value in source.items():
+        if key.startswith('planet:'):
+            createTextElement(xsource, key, value)
+
+    xentry.appendChild(xsource)
+
+def reconstitute(feed, entry):
+    """ create an entry document from a parsed feed """
+    xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
+    xentry=xdoc.documentElement
+    xentry.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
+
+    id(xentry, entry)
+    links(xentry, entry)
+
+    bozo = feed.bozo
+    content(xentry, 'title', entry.get('title_detail',None), bozo)
+    content(xentry, 'summary', entry.get('summary_detail',None), bozo)
+    content(xentry, 'content', entry.get('content',[None])[0], bozo)
+    content(xentry, 'rights', entry.get('rights_detail',None), bozo)
+
+    date(xentry, 'updated', entry.get('updated_parsed',time.gmtime()))
+    date(xentry, 'published', entry.get('published_parsed',None))
+
+    author(xentry, 'author', entry.get('author_detail',None))
+    for contributor in entry.get('contributors',[]):
+        author(xentry, 'contributor', contributor)
+
+    source(xentry, entry.get('source', feed.feed), bozo)
+
+    return xdoc
--- a/planet/spider.py
+++ b/planet/spider.py
@ -0,0 +1,86 @@
+"""
+Fetch either a single feed, or a set of feeds, normalize to Atom and XHTML,
+and write each as a set of entries in a cache directory.
+"""
+
+from planet import config, feedparser, reconstitute
+import time, calendar, re, os
+
+try:
+    from xml.dom.ext import PrettyPrint
+except:
+    PrettyPrint = None
+
+# Regular expressions to sanitise cache filenames
+re_url_scheme    = re.compile(r'^[^:]*://')
+re_slash         = re.compile(r'[?/]+')
+re_initial_cruft = re.compile(r'^[,.]*')
+re_final_cruft   = re.compile(r'[,.]*$')
+
+def filename(directory, filename):
+    """Return a filename suitable for the cache.
+
+    Strips dangerous and common characters to create a filename we
+    can use to store the cache in.
+    """
+    try:
+        if re_url_scheme.match(filename):
+            if isinstance(filename,str):
+                filename=filename.decode('utf-8').encode('idna')
+            else:
+                filename=filename.encode('idna')
+    except:
+        pass
+    filename = re_url_scheme.sub("", filename)
+    filename = re_slash.sub(",", filename)
+    filename = re_initial_cruft.sub("", filename)
+    filename = re_final_cruft.sub("", filename)
+
+    return os.path.join(directory, filename)
+
+def spiderFeed(feed):
+    """ Spider (fetch) a single feed """
+    data = feedparser.parse(feed)
+    cache = config.cache_directory()
+
+    # capture data from the planet configuration file
+    for name, value in config.feed_options(feed).items():
+        data.feed['planet:'+name] = value
+    
+    for entry in data.entries:
+        if not entry.has_key('id'):
+            entry['id'] = reconstitute.id(None, entry)
+            if not entry['id']: continue
+
+        out = filename(cache, entry.id)
+
+        if entry.has_key('updated_parsed'):
+            mtime = calendar.timegm(entry.updated_parsed)
+        else:
+            try:
+                mtime = os.stat(out).st_mtime
+            except:
+                mtime = time.time()
+            entry['updated_parsed'] = time.gmtime(mtime)
+
+        xml = reconstitute.reconstitute(data, entry)
+        
+        file = open(out,'w')
+        if PrettyPrint:
+            PrettyPrint(reconstitute.reconstitute(data, entry), file)
+        else:
+            file.write(reconstitute.reconstitute(data, entry).toxml('utf-8'))
+        file.close()
+
+        os.utime(out, (mtime, mtime))
+
+def spiderPlanet(configFile):
+    """ Spider (fetch) an entire planet """
+    import planet
+    config.load(configFile)
+    log = planet.getLogger(config.log_level())
+    planet.setTimeout(config.feed_timeout())
+
+    for feed in config.feeds():
+        log.info("Updating feed %s", feed)
+        spiderFeed(feed)
--- a/planet/splice.py
+++ b/planet/splice.py
@ -0,0 +1,46 @@
+""" Splice together a planet from a cache of feed entries """
+import glob, os
+from planet import config
+from xml.dom import minidom
+from reconstitute import createTextElement
+
+def splice(configFile):
+    """ Splice together a planet from a cache of entries """
+    import planet
+    config.load(configFile)
+    log = planet.getLogger(config.log_level())
+
+    cache = config.cache_directory()
+    dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")]
+    dir.sort()
+    dir.reverse()
+
+    items=max([config.items_per_page(templ)
+        for templ in config.template_files()])
+
+    doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>')
+    feed = doc.documentElement
+
+    # insert feed information
+    createTextElement(feed, 'title', config.name())
+
+    # insert entry information
+    for mtime,file in dir[:items]:
+        entry=minidom.parse(file)
+        feed.appendChild(entry.documentElement)
+
+    # insert subscription information
+    feed.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
+    for sub in config.feeds():
+        name = config.feed_options(sub).get('name','')
+        xsub = doc.createElement('planet:subscription')
+        xlink = doc.createElement('link')
+        xlink.setAttribute('rel','self')
+        xlink.setAttribute('href',sub.decode('utf-8'))
+        xsub.appendChild(xlink)
+        xname = doc.createElement('planet:name')
+        xname.appendChild(doc.createTextNode(name.decode('utf-8')))
+        xsub.appendChild(xname)
+        feed.appendChild(xsub)
+
+    return doc
--- a/planet/timeoutsocket.py
+++ b/planet/timeoutsocket.py
@ -0,0 +1,424 @@
+
+####
+# Copyright 2000,2001 by Timothy O'Malley <timo@alum.mit.edu>
+# 
+#                All Rights Reserved
+# 
+# Permission to use, copy, modify, and distribute this software
+# and its documentation for any purpose and without fee is hereby
+# granted, provided that the above copyright notice appear in all
+# copies and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Timothy O'Malley  not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission. 
+# 
+# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
+# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE. 
+#
+####
+
+"""Timeout Socket
+
+This module enables a timeout mechanism on all TCP connections.  It
+does this by inserting a shim into the socket module.  After this module
+has been imported, all socket creation goes through this shim.  As a
+result, every TCP connection will support a timeout.
+
+The beauty of this method is that it immediately and transparently
+enables the entire python library to support timeouts on TCP sockets.
+As an example, if you wanted to SMTP connections to have a 20 second
+timeout:
+
+    import timeoutsocket
+    import smtplib
+    timeoutsocket.setDefaultSocketTimeout(20)
+
+
+The timeout applies to the socket functions that normally block on
+execution:  read, write, connect, and accept.  If any of these 
+operations exceeds the specified timeout, the exception Timeout
+will be raised.
+
+The default timeout value is set to None.  As a result, importing
+this module does not change the default behavior of a socket.  The
+timeout mechanism only activates when the timeout has been set to
+a numeric value.  (This behavior mimics the behavior of the
+select.select() function.)
+
+This module implements two classes: TimeoutSocket and TimeoutFile.
+
+The TimeoutSocket class defines a socket-like object that attempts to
+avoid the condition where a socket may block indefinitely.  The
+TimeoutSocket class raises a Timeout exception whenever the
+current operation delays too long. 
+
+The TimeoutFile class defines a file-like object that uses the TimeoutSocket
+class.  When the makefile() method of TimeoutSocket is called, it returns
+an instance of a TimeoutFile.
+
+Each of these objects adds two methods to manage the timeout value:
+
+    get_timeout()   -->  returns the timeout of the socket or file
+    set_timeout()   -->  sets the timeout of the socket or file
+
+
+As an example, one might use the timeout feature to create httplib
+connections that will timeout after 30 seconds:
+
+    import timeoutsocket
+    import httplib
+    H = httplib.HTTP("www.python.org")
+    H.sock.set_timeout(30)
+
+Note:  When used in this manner, the connect() routine may still
+block because it happens before the timeout is set.  To avoid
+this, use the 'timeoutsocket.setDefaultSocketTimeout()' function.
+
+Good Luck!
+
+"""
+
+__version__ = "$Revision: 1.1.1.1 $"
+__author__  = "Timothy O'Malley <timo@alum.mit.edu>"
+
+#
+# Imports
+#
+import select, string
+import socket
+if not hasattr(socket, "_no_timeoutsocket"):
+    _socket = socket.socket
+else:
+    _socket = socket._no_timeoutsocket
+
+
+#
+# Set up constants to test for Connected and Blocking operations.
+# We delete 'os' and 'errno' to keep our namespace clean(er).
+# Thanks to Alex Martelli and G. Li for the Windows error codes.
+#
+import os
+if os.name == "nt":
+    _IsConnected = ( 10022, 10056 )
+    _ConnectBusy = ( 10035, )
+    _AcceptBusy  = ( 10035, )
+else:
+    import errno
+    _IsConnected = ( errno.EISCONN, )
+    _ConnectBusy = ( errno.EINPROGRESS, errno.EALREADY, errno.EWOULDBLOCK )
+    _AcceptBusy  = ( errno.EAGAIN, errno.EWOULDBLOCK )
+    del errno
+del os
+
+
+#
+# Default timeout value for ALL TimeoutSockets
+#
+_DefaultTimeout = None
+def setDefaultSocketTimeout(timeout):
+    global _DefaultTimeout
+    _DefaultTimeout = timeout
+def getDefaultSocketTimeout():
+    return _DefaultTimeout
+
+#
+# Exceptions for socket errors and timeouts
+#
+Error = socket.error
+class Timeout(Exception):
+    pass
+
+
+#
+# Factory function
+#
+from socket import AF_INET, SOCK_STREAM
+def timeoutsocket(family=AF_INET, type=SOCK_STREAM, proto=None):
+    if family != AF_INET or type != SOCK_STREAM:
+        if proto:
+            return _socket(family, type, proto)
+        else:
+            return _socket(family, type)
+    return TimeoutSocket( _socket(family, type), _DefaultTimeout )
+# end timeoutsocket
+
+#
+# The TimeoutSocket class definition
+#
+class TimeoutSocket:
+    """TimeoutSocket object
+    Implements a socket-like object that raises Timeout whenever
+    an operation takes too long.
+    The definition of 'too long' can be changed using the
+    set_timeout() method.
+    """
+
+    _copies = 0
+    _blocking = 1
+    
+    def __init__(self, sock, timeout):
+        self._sock     = sock
+        self._timeout  = timeout
+    # end __init__
+
+    def __getattr__(self, key):
+        return getattr(self._sock, key)
+    # end __getattr__
+
+    def get_timeout(self):
+        return self._timeout
+    # end set_timeout
+
+    def set_timeout(self, timeout=None):
+        self._timeout = timeout
+    # end set_timeout
+
+    def setblocking(self, blocking):
+        self._blocking = blocking
+        return self._sock.setblocking(blocking)
+    # end set_timeout
+
+    def connect_ex(self, addr):
+        errcode = 0
+        try:
+            self.connect(addr)
+        except Error, why:
+            errcode = why[0]
+        return errcode
+    # end connect_ex
+        
+    def connect(self, addr, port=None, dumbhack=None):
+        # In case we were called as connect(host, port)
+        if port != None:  addr = (addr, port)
+
+        # Shortcuts
+        sock    = self._sock
+        timeout = self._timeout
+        blocking = self._blocking
+
+        # First, make a non-blocking call to connect
+        try:
+            sock.setblocking(0)
+            sock.connect(addr)
+            sock.setblocking(blocking)
+            return
+        except Error, why:
+            # Set the socket's blocking mode back
+            sock.setblocking(blocking)
+            
+            # If we are not blocking, re-raise
+            if not blocking:
+                raise
+            
+            # If we are already connected, then return success.
+            # If we got a genuine error, re-raise it.
+            errcode = why[0]
+            if dumbhack and errcode in _IsConnected:
+                return
+            elif errcode not in _ConnectBusy:
+                raise
+            
+        # Now, wait for the connect to happen
+        # ONLY if dumbhack indicates this is pass number one.
+        #   If select raises an error, we pass it on.
+        #   Is this the right behavior?
+        if not dumbhack:
+            r,w,e = select.select([], [sock], [], timeout)
+            if w:
+                return self.connect(addr, dumbhack=1)
+
+        # If we get here, then we should raise Timeout
+        raise Timeout("Attempted connect to %s timed out." % str(addr) )
+    # end connect
+
+    def accept(self, dumbhack=None):
+        # Shortcuts
+        sock     = self._sock
+        timeout  = self._timeout
+        blocking = self._blocking
+
+        # First, make a non-blocking call to accept
+        #  If we get a valid result, then convert the
+        #  accept'ed socket into a TimeoutSocket.
+        # Be carefult about the blocking mode of ourselves.
+        try:
+            sock.setblocking(0)
+            newsock, addr = sock.accept()
+            sock.setblocking(blocking)
+            timeoutnewsock = self.__class__(newsock, timeout)
+            timeoutnewsock.setblocking(blocking)
+            return (timeoutnewsock, addr)
+        except Error, why:
+            # Set the socket's blocking mode back
+            sock.setblocking(blocking)
+
+            # If we are not supposed to block, then re-raise
+            if not blocking:
+                raise
+            
+            # If we got a genuine error, re-raise it.
+            errcode = why[0]
+            if errcode not in _AcceptBusy:
+                raise
+            
+        # Now, wait for the accept to happen
+        # ONLY if dumbhack indicates this is pass number one.
+        #   If select raises an error, we pass it on.
+        #   Is this the right behavior?
+        if not dumbhack:
+            r,w,e = select.select([sock], [], [], timeout)
+            if r:
+                return self.accept(dumbhack=1)
+
+        # If we get here, then we should raise Timeout
+        raise Timeout("Attempted accept timed out.")
+    # end accept
+
+    def send(self, data, flags=0):
+        sock = self._sock
+        if self._blocking:
+            r,w,e = select.select([],[sock],[], self._timeout)
+            if not w:
+                raise Timeout("Send timed out")
+        return sock.send(data, flags)
+    # end send
+
+    def recv(self, bufsize, flags=0):
+        sock = self._sock
+        if self._blocking:
+            r,w,e = select.select([sock], [], [], self._timeout)
+            if not r:
+                raise Timeout("Recv timed out")
+        return sock.recv(bufsize, flags)
+    # end recv
+
+    def makefile(self, flags="r", bufsize=-1):
+        self._copies = self._copies +1
+        return TimeoutFile(self, flags, bufsize)
+    # end makefile
+
+    def close(self):
+        if self._copies <= 0:
+            self._sock.close()
+        else:
+            self._copies = self._copies -1
+    # end close
+
+# end TimeoutSocket
+
+
+class TimeoutFile:
+    """TimeoutFile object
+    Implements a file-like object on top of TimeoutSocket.
+    """
+    
+    def __init__(self, sock, mode="r", bufsize=4096):
+        self._sock          = sock
+        self._bufsize       = 4096
+        if bufsize > 0: self._bufsize = bufsize
+        if not hasattr(sock, "_inqueue"): self._sock._inqueue = ""
+
+    # end __init__
+
+    def __getattr__(self, key):
+        return getattr(self._sock, key)
+    # end __getattr__
+
+    def close(self):
+        self._sock.close()
+        self._sock = None
+    # end close
+    
+    def write(self, data):
+        self.send(data)
+    # end write
+
+    def read(self, size=-1):
+        _sock = self._sock
+        _bufsize = self._bufsize
+        while 1:
+            datalen = len(_sock._inqueue)
+            if datalen >= size >= 0:
+                break
+            bufsize = _bufsize
+            if size > 0:
+                bufsize = min(bufsize, size - datalen )
+            buf = self.recv(bufsize)
+            if not buf:
+                break
+            _sock._inqueue = _sock._inqueue + buf
+        data = _sock._inqueue
+        _sock._inqueue = ""
+        if size > 0 and datalen > size:
+            _sock._inqueue = data[size:]
+            data = data[:size]
+        return data
+    # end read
+
+    def readline(self, size=-1):
+        _sock = self._sock
+        _bufsize = self._bufsize
+        while 1:
+            idx = string.find(_sock._inqueue, "\n")
+            if idx >= 0:
+                break
+            datalen = len(_sock._inqueue)
+            if datalen >= size >= 0:
+                break
+            bufsize = _bufsize
+            if size > 0:
+                bufsize = min(bufsize, size - datalen )
+            buf = self.recv(bufsize)
+            if not buf:
+                break
+            _sock._inqueue = _sock._inqueue + buf
+
+        data = _sock._inqueue
+        _sock._inqueue = ""
+        if idx >= 0:
+            idx = idx + 1
+            _sock._inqueue = data[idx:]
+            data = data[:idx]
+        elif size > 0 and datalen > size:
+            _sock._inqueue = data[size:]
+            data = data[:size]
+        return data
+    # end readline
+
+    def readlines(self, sizehint=-1):
+        result = []
+        data = self.read()
+        while data:
+            idx = string.find(data, "\n")
+            if idx >= 0:
+                idx = idx + 1
+                result.append( data[:idx] )
+                data = data[idx:]
+            else:
+                result.append( data )
+                data = ""
+        return result
+    # end readlines
+
+    def flush(self):  pass
+
+# end TimeoutFile
+
+
+#
+# Silently replace the socket() builtin function with
+# our timeoutsocket() definition.
+#
+if not hasattr(socket, "_no_timeoutsocket"):
+    socket._no_timeoutsocket = socket.socket
+    socket.socket = timeoutsocket
+del socket
+socket = timeoutsocket
+# Finis
--- a/runtests.py
+++ b/runtests.py
@ -0,0 +1,11 @@
+#!/usr/bin/env python
+import glob, trace, unittest
+
+# find all of the planet test modules
+modules = map(trace.fullmodname, glob.glob('tests/test_*.py'))
+
+# load all of the tests into a suite
+suite = unittest.TestLoader().loadTestsFromNames(modules)
+
+# run test suite
+unittest.TextTestRunner().run(suite)
--- a/spider.py
+++ b/spider.py
@ -0,0 +1,20 @@
+"""
+Main program to run just the spider portion of planet
+"""
+
+import sys
+from planet import spider, config
+
+if __name__ == '__main__':
+
+    if len(sys.argv) == 2:
+        # spider all feeds 
+        spider.spiderPlanet(sys.argv[1])
+    elif len(sys.argv) > 2 and os.path.isdir(sys.argv[1]):
+        # spider selected feeds 
+        config.load(sys.argv[1])
+        for feed in sys.argv[2:]:
+            spider.spiderFeed(feed)
+    else:
+        print "Usage:"
+        print "  python %s config.ini [URI URI ...]" % sys.argv[0]
--- a/splice.py
+++ b/splice.py
@ -0,0 +1,21 @@
+"""
+Main program to run just the splice portion of planet
+"""
+
+import os.path
+import sys
+from planet import splice
+
+if __name__ == '__main__':
+
+    if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]):
+        # at the moment, we don't have template support, so we cheat and
+        # simply insert a XSLT processing instruction
+        doc = splice.splice(sys.argv[1])
+        pi =  doc.createProcessingInstruction(
+            'xml-stylesheet','type="text/xsl" href="planet.xslt"')
+        doc.insertBefore(pi, doc.firstChild)
+        print doc.toxml('utf-8')
+    else:
+        print "Usage:"
+        print "  python %s config.ini" % sys.argv[0]
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/data/config/basic.ini
+++ b/tests/data/config/basic.ini
@ -0,0 +1,13 @@
+[Planet]
+name = Test Configuration
+template_files = index.html.tmpl atom.xml.tmpl
+items_per_page = 50
+
+[index.html.tmpl]
+days_per_page = 7
+
+[feed1]
+name = one
+
+[feed2]
+name = two
--- a/tests/data/reconstitute/author_email.xml
+++ b/tests/data/reconstitute/author_email.xml
@ -0,0 +1,13 @@
+<!--
+Description:  author name
+Expect:       author_detail.email == 'john@example.com'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <author>
+      <email>john@example.com</email>
+    </author>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/author_name.xml
+++ b/tests/data/reconstitute/author_name.xml
@ -0,0 +1,13 @@
+<!--
+Description:  author name
+Expect:       author_detail.name == 'John Doe'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <author>
+      <name>John Doe</name>
+    </author>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/author_uri.xml
+++ b/tests/data/reconstitute/author_uri.xml
@ -0,0 +1,13 @@
+<!--
+Description:  author name
+Expect:       author_detail.href == 'http://example.com/~john/'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <author>
+      <uri>http://example.com/~john/</uri>
+    </author>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/content_html.xml
+++ b/tests/data/reconstitute/content_html.xml
@ -0,0 +1,10 @@
+<!--
+Description:  entity encoded html content
+Expect:       content[0].value == u'D\xe9tente' and content[0].type=='text/plain'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <content type="html">D&amp;eacute;tente</content>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/content_illegal_char.xml
+++ b/tests/data/reconstitute/content_illegal_char.xml
@ -0,0 +1,10 @@
+<!--
+Description:  illegal control character
+Expect:       content[0].value == u'Page 1<acronym title="U+000c">\ufffd</acronym>Page 2'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <content type="html">Page 1&#12;Page 2</content>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/content_lang.xml
+++ b/tests/data/reconstitute/content_lang.xml
@ -0,0 +1,10 @@
+<!--
+Description:  content value
+Expect:       content[0].language == 'en-us'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <content xml:lang="en-us">foo</content>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/content_tag_soup.xml
+++ b/tests/data/reconstitute/content_tag_soup.xml
@ -0,0 +1,10 @@
+<!--
+Description:  improperly nested tags
+Expect:       content[0].value == 'This is <b><i>very</i></b> confused'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <content type="html">This is &lt;B&gt;&lt;i;&gt;very&lt;/b&gt;&lt;/I&gt; confused</content>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/content_text.xml
+++ b/tests/data/reconstitute/content_text.xml
@ -0,0 +1,10 @@
+<!--
+Description:  plain text content
+Expect:       content[0].value == 'AT&T'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <content type="text">AT&amp;T</content>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/content_xhtml.xml
+++ b/tests/data/reconstitute/content_xhtml.xml
@ -0,0 +1,13 @@
+<!--
+Description:  xhtml content
+Expect:       content[0].value == 'A <b>very</b> bad day'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <content type="xhtml">
+      <div xmlns="http://www.w3.org/1999/xhtml">A <b>very</b> bad day</div>
+    </content>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/contributor_email.xml
+++ b/tests/data/reconstitute/contributor_email.xml
@ -0,0 +1,13 @@
+<!--
+Description:  contributor name
+Expect:       contributors[0].email == 'john@example.com'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <contributor>
+      <email>john@example.com</email>
+    </contributor>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/contributor_name.xml
+++ b/tests/data/reconstitute/contributor_name.xml
@ -0,0 +1,13 @@
+<!--
+Description:  contributor name
+Expect:       contributors[0].name == 'John Doe'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <contributor>
+      <name>John Doe</name>
+    </contributor>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/contributor_uri.xml
+++ b/tests/data/reconstitute/contributor_uri.xml
@ -0,0 +1,13 @@
+<!--
+Description:  contributor name
+Expect:       contributors[0].href == 'http://example.com/~john/'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <contributor>
+      <uri>http://example.com/~john/</uri>
+    </contributor>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/id.xml
+++ b/tests/data/reconstitute/id.xml
@ -0,0 +1,11 @@
+<!--
+Description:  id
+Expect:       id == 'http://example.com/1'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <id>http://example.com/1</id>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/id_only_content.xml
+++ b/tests/data/reconstitute/id_only_content.xml
@ -0,0 +1,13 @@
+<!--
+Description:  id generated from content
+Expect:       id == 'http://example.com//9a0364b9e99bb480dd25e1f0284c8555'
+-->
+
+<rss xml:base="http://example.com/">
+  <channel>
+    <item xmlns:content="http://purl.org/rss/1.0/modules/content/">
+      <content:encoded>content</content>
+    </item>
+  </channel>
+</rss>
+
--- a/tests/data/reconstitute/id_only_description.xml
+++ b/tests/data/reconstitute/id_only_description.xml
@ -0,0 +1,13 @@
+<!--
+Description:  id generated from description
+Expect:       id == 'http://example.com//67daf92c833c41c95db874e18fcb2786'
+-->
+
+<rss xml:base="http://example.com/">
+  <channel>
+    <item>
+      <description>description</description>
+    </item>
+  </channel>
+</rss>
+
--- a/tests/data/reconstitute/id_only_link.xml
+++ b/tests/data/reconstitute/id_only_link.xml
@ -0,0 +1,13 @@
+<!--
+Description:  id generated from link
+Expect:       id == 'http://example.com/1'
+-->
+
+<rss>
+  <channel>
+    <item>
+      <link>http://example.com/1</link>
+    </item>
+  </channel>
+</rss>
+
--- a/tests/data/reconstitute/id_only_title.xml
+++ b/tests/data/reconstitute/id_only_title.xml
@ -0,0 +1,13 @@
+<!--
+Description:  id generated from title
+Expect:       id == 'http://example.com//d5d3db1765287eef77d7927cc956f50a'
+-->
+
+<rss xml:base="http://example.com/">
+  <channel>
+    <item>
+      <title>title</title>
+    </item>
+  </channel>
+</rss>
+
--- a/tests/data/reconstitute/link_href.xml
+++ b/tests/data/reconstitute/link_href.xml
@ -0,0 +1,11 @@
+<!--
+Description:  link relationship
+Expect:       links[0].href == 'http://example.com/1'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <link href="http://example.com/1"/>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/link_rel.xml
+++ b/tests/data/reconstitute/link_rel.xml
@ -0,0 +1,11 @@
+<!--
+Description:  link relationship
+Expect:       links[0].rel == 'alternate'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <link href="http://example.com/1"/>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/link_type.xml
+++ b/tests/data/reconstitute/link_type.xml
@ -0,0 +1,11 @@
+<!--
+Description:  link relationship
+Expect:       links[0].type == 'text/html'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <link href="http://example.com/1"/>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/published.xml
+++ b/tests/data/reconstitute/published.xml
@ -0,0 +1,11 @@
+<!--
+Description:  published, rollover past midnight on feb 28 in leap year
+Expect:       published_parsed == (2004, 2, 29, 2, 14, 55, 6, 60, 0)
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <published>2004-02-28T18:14:55-08:00</published>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/rights.xml
+++ b/tests/data/reconstitute/rights.xml
@ -0,0 +1,11 @@
+<!--
+Description:  rights
+Expect:       rights == u'\xa9 2006'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <rights type="html">&amp;copy; 2006</rights>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/source_author.xml
+++ b/tests/data/reconstitute/source_author.xml
@ -0,0 +1,12 @@
+<!--
+Description:  source author
+Expect:       source.author_detail.name == 'John Doe'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <author>
+    <name>John Doe</name>
+  </author>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/source_contributor.xml
+++ b/tests/data/reconstitute/source_contributor.xml
@ -0,0 +1,12 @@
+<!--
+Description:  source contributor
+Expect:       source.contributors[0].name == 'John Doe'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <contributor>
+    <name>John Doe</name>
+  </contributor>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/source_icon.xml
+++ b/tests/data/reconstitute/source_icon.xml
@ -0,0 +1,10 @@
+<!--
+Description:  source icon
+Expect:       source.icon == 'http://www.example.com/favicon.ico'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <icon>http://www.example.com/favicon.ico</icon>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/source_id.xml
+++ b/tests/data/reconstitute/source_id.xml
@ -0,0 +1,10 @@
+<!--
+Description:  source id
+Expect:       source.id == 'http://example.com/'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <id>http://example.com/</id>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/source_link.xml
+++ b/tests/data/reconstitute/source_link.xml
@ -0,0 +1,10 @@
+<!--
+Description:  source link
+Expect:       source.links[0].href == 'http://example.com/atom.xml'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <link rel='self' href='http://example.com/atom.xml'/>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/source_logo.xml
+++ b/tests/data/reconstitute/source_logo.xml
@ -0,0 +1,10 @@
+<!--
+Description:  source logo
+Expect:       source.logo == 'http://www.example.com/logo.jpg'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <logo>http://www.example.com/logo.jpg</logo>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/source_rights.xml
+++ b/tests/data/reconstitute/source_rights.xml
@ -0,0 +1,10 @@
+<!--
+Description:  source rights
+Expect:       source.rights == u'\xa9 2006'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <rights type="html">&amp;copy; 2006</rights>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/source_subtitle.xml
+++ b/tests/data/reconstitute/source_subtitle.xml
@ -0,0 +1,10 @@
+<!--
+Description:  source subtitle
+Expect:       source.subtitle == 'snarky phrase'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <subtitle>snarky phrase</subtitle>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/source_title.xml
+++ b/tests/data/reconstitute/source_title.xml
@ -0,0 +1,10 @@
+<!--
+Description:  source title
+Expect:       source.title == 'visible name'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <title>visible name</title>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/source_updated.xml
+++ b/tests/data/reconstitute/source_updated.xml
@ -0,0 +1,10 @@
+<!--
+Description:  source updated, rollover past midnight on feb 28 in leap year
+Expect:       source.updated_parsed == (2004, 2, 29, 2, 14, 55, 6, 60, 0)
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <updated>2004-02-28T18:14:55-08:00</updated>
+  <entry/>
+</feed>
+
--- a/tests/data/reconstitute/summary_html.xml
+++ b/tests/data/reconstitute/summary_html.xml
@ -0,0 +1,10 @@
+<!--
+Description:  entity encoded html summary
+Expect:       summary_detail.value == u'D\xe9tente' and summary_detail.type=='text/plain'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <summary type="html">D&amp;eacute;tente</summary>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/summary_lang.xml
+++ b/tests/data/reconstitute/summary_lang.xml
@ -0,0 +1,10 @@
+<!--
+Description:  summary value
+Expect:       summary_detail.language == 'en-us'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <summary xml:lang="en-us">foo</summary>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/summary_text.xml
+++ b/tests/data/reconstitute/summary_text.xml
@ -0,0 +1,10 @@
+<!--
+Description:  plain text summary
+Expect:       summary_detail.value == 'AT&T'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <summary type="text">AT&amp;T</summary>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/summary_xhtml.xml
+++ b/tests/data/reconstitute/summary_xhtml.xml
@ -0,0 +1,13 @@
+<!--
+Description:  xhtml summary
+Expect:       summary_detail.value == 'A <b>very</b> bad day'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <summary type="xhtml">
+      <div xmlns="http://www.w3.org/1999/xhtml">A <b>very</b> bad day</div>
+    </summary>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/title_html.xml
+++ b/tests/data/reconstitute/title_html.xml
@ -0,0 +1,10 @@
+<!--
+Description:  entity encoded html title
+Expect:       title_detail.value == u'D\xe9tente' and title_detail.type=='text/plain'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <title type="html">D&amp;eacute;tente</title>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/title_lang.xml
+++ b/tests/data/reconstitute/title_lang.xml
@ -0,0 +1,10 @@
+<!--
+Description:  title value
+Expect:       title_detail.language == 'en-us'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <title xml:lang="en-us">foo</title>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/title_text.xml
+++ b/tests/data/reconstitute/title_text.xml
@ -0,0 +1,10 @@
+<!--
+Description:  plain text title
+Expect:       title_detail.value == 'AT&T'
+-->
+
+<feed xmns="http://www.w3.org/2005/Atom">
+  <entry>
+    <title type="text">AT&amp;T</title>
+  </entry>
+</feed>
--- a/tests/data/reconstitute/title_xhtml.xml
+++ b/tests/data/reconstitute/title_xhtml.xml
@ -0,0 +1,13 @@
+<!--
+Description:  xhtml title
+Expect:       title_detail.value == 'A <b>very</b> bad day'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <title type="xhtml">
+      <div xmlns="http://www.w3.org/1999/xhtml">A <b>very</b> bad day</div>
+    </title>
+  </entry>
+</feed>
+
--- a/tests/data/reconstitute/updated.xml
+++ b/tests/data/reconstitute/updated.xml
@ -0,0 +1,11 @@
+<!--
+Description:  updated, rollover past midnight on feb 28 in leap year
+Expect:       updated_parsed == (2004, 2, 29, 2, 14, 55, 6, 60, 0)
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry>
+    <updated>2004-02-28T18:14:55-08:00</updated>
+  </entry>
+</feed>
+
--- a/tests/data/spider/config.ini
+++ b/tests/data/spider/config.ini
@ -0,0 +1,12 @@
+[Planet]
+cache_directory = tests/work/spider/cache
+template_files = 
+
+[tests/data/spider/testfeed1b.atom]
+name = one
+
+[tests/data/spider/testfeed2.atom]
+name = two
+
+[tests/data/spider/testfeed3.rss]
+name = three
--- a/tests/data/spider/testfeed1a.atom
+++ b/tests/data/spider/testfeed1a.atom
@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <link rel="self" href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom"/>
+  <id>tag:planet.intertwingly.net,2006:testfeed1</id>
+
+  <title>Sam Ruby</title>
+  <subtitle>It’s just data</subtitle>
+  <author>
+    <name>Sam Ruby</name>
+    <email>rubys@intertwingly.net</email>
+    <uri>http://www.intertwingly.net/blog/</uri>
+  </author>
+  <updated>2006-06-16T20:15:18-04:00</updated>
+  <link href="http://www.intertwingly.net/blog/"/>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
+    <link href="http://example.com/1"/>
+    <title>Mercury</title>
+    <content>Messenger of the Roman Gods</content>
+    <updated>2006-01-01T00:00:00Z</updated>
+  </entry>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
+    <link href="http://example.com/2"/>
+    <title>Venus</title>
+    <content>the Morning Star</content>
+    <updated>2006-01-02T00:00:00Z</updated>
+  </entry>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
+    <link href="http://example.com/3"/>
+    <title>Earth</title>
+    <content>the Blue Planet</content>
+    <updated>2006-01-03T00:00:00Z</updated>
+  </entry>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
+    <link href="http://example.com/4"/>
+    <title>Mars</title>
+    <content>the Red Planet</content>
+    <updated>2006-01-04T00:00:00Z</updated>
+  </entry>
+
+</feed>
+
--- a/tests/data/spider/testfeed1b.atom
+++ b/tests/data/spider/testfeed1b.atom
@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <link rel="self" href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom"/>
+  <id>tag:planet.intertwingly.net,2006:testfeed1</id>
+
+  <title>Sam Ruby</title>
+  <subtitle>It’s just data</subtitle>
+  <author>
+    <name>Sam Ruby</name>
+    <email>rubys@intertwingly.net</email>
+    <uri>http://www.intertwingly.net/blog/</uri>
+  </author>
+  <updated>2006-06-16T20:15:18-04:00</updated>
+  <link href="http://www.intertwingly.net/blog/"/>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
+    <link href="http://example.com/1"/>
+    <title>Mercury</title>
+    <content>Messenger of the Roman Gods</content>
+    <updated>2006-01-01T00:00:00Z</updated>
+  </entry>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
+    <link href="http://example.com/2"/>
+    <title>Venus</title>
+    <content>the Jewel of the Sky</content>
+    <published>2006-01-02T00:00:00Z</published>
+    <updated>2006-02-02T00:00:00Z</updated>
+  </entry>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
+    <link href="http://example.com/3"/>
+    <title>Earth</title>
+    <content>the Blue Planet</content>
+    <updated>2006-01-03T00:00:00Z</updated>
+  </entry>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
+    <link href="http://example.com/4"/>
+    <title>Mars</title>
+    <content>the Red Planet</content>
+    <updated>2006-01-04T00:00:00Z</updated>
+  </entry>
+
+</feed>
+
--- a/tests/data/spider/testfeed2.atom
+++ b/tests/data/spider/testfeed2.atom
@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <link rel="self" href="http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom"/>
+  <id>tag:planet.intertwingly.net,2006:testfeed2</id>
+
+  <title>Sam Ruby</title>
+  <subtitle>It’s just data</subtitle>
+  <author>
+    <name>Sam Ruby</name>
+    <email>rubys@intertwingly.net</email>
+    <uri>http://www.intertwingly.net/blog/</uri>
+  </author>
+  <updated>2006-06-16T20:15:18-04:00</updated>
+  <link href="http://www.intertwingly.net/blog/"/>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed2/1</id>
+    <link href="http://example.com/1"/>
+    <title>Mercury</title>
+    <content>Messenger of the Roman Gods</content>
+    <updated>2006-01-01T00:00:00Z</updated>
+  </entry>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed2/2</id>
+    <link href="http://example.com/2"/>
+    <title>Venus</title>
+    <content>the Morning Star</content>
+    <updated>2006-01-02T00:00:00Z</updated>
+  </entry>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed2/3</id>
+    <link href="http://example.com/3"/>
+    <title>Earth</title>
+    <content>the Blue Planet</content>
+    <updated>2006-01-03T00:00:00Z</updated>
+  </entry>
+
+  <entry>
+    <id>tag:planet.intertwingly.net,2006:testfeed2/4</id>
+    <link href="http://example.com/4"/>
+    <title>Mars</title>
+    <content>the Red Planet</content>
+    <updated>2006-01-04T00:00:00Z</updated>
+  </entry>
+
+</feed>
+
--- a/tests/data/spider/testfeed3.rss
+++ b/tests/data/spider/testfeed3.rss
@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rss version="2.0">
+  <channel>
+    <title>Sam Ruby</title>
+    <link>http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss</link>
+    <description>It’s just data</description>
+
+    <item>
+      <guid>tag:planet.intertwingly.net,2006:testfeed3/1</guid>
+      <link href="http://example.com/1"/>
+      <title>Mercury</title>
+      <description>Messenger of the Roman Gods</description>
+      <pubDate>Sun, 01 Jan 2006 00:00:00 +0000</pubDate>
+    </item>
+
+    <item>
+      <guid>tag:planet.intertwingly.net,2006:testfeed3/2</guid>
+      <link>http://example.com/2</link>
+      <title>Venus</title>
+      <description>the Morning Star</description>
+    </item>
+
+    <item>
+      <link>http://example.com/3</link>
+      <title>Earth</title>
+      <description>the Blue Planet</description>
+      <pubDate>Tue, 03 Jan 2006 00:00:00 +0000</pubDate>
+    </item>
+
+    <entry>
+      <link href="http://example.com/4"/>
+      <title>Mars</title>
+      <description>the Red Planet</description>
+    </entry>
+
+</feed>
+
--- a/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,1
+++ b/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,1
@ -0,0 +1,22 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
+  <id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
+  <link href='http://example.com/1' type='text/html' rel='alternate'/>
+  <title>Mercury</title>
+  <content>Messenger of the Roman Gods</content>
+  <updated>2006-01-01T00:00:00Z</updated>
+  <source>
+    <id>tag:planet.intertwingly.net,2006:testfeed1</id>
+    <author>
+      <name>Sam Ruby</name>
+      <email>rubys@intertwingly.net</email>
+      <uri>http://www.intertwingly.net/blog/</uri>
+    </author>
+    <link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
+    <link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
+    <subtitle>It’s just data</subtitle>
+    <title>Sam Ruby</title>
+    <updated>2006-06-17T00:15:18Z</updated>
+    <planet:name>one</planet:name>
+  </source>
+</entry>
--- a/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,2
+++ b/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,2
@ -0,0 +1,23 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
+  <id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
+  <link href='http://example.com/2' type='text/html' rel='alternate'/>
+  <title>Venus</title>
+  <content>the Jewel of the Sky</content>
+  <updated>2006-02-02T00:00:00Z</updated>
+  <published>2006-01-02T00:00:00Z</published>
+  <source>
+    <id>tag:planet.intertwingly.net,2006:testfeed1</id>
+    <author>
+      <name>Sam Ruby</name>
+      <email>rubys@intertwingly.net</email>
+      <uri>http://www.intertwingly.net/blog/</uri>
+    </author>
+    <link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
+    <link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
+    <subtitle>It’s just data</subtitle>
+    <title>Sam Ruby</title>
+    <updated>2006-06-17T00:15:18Z</updated>
+    <planet:name>one</planet:name>
+  </source>
+</entry>
--- a/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,3
+++ b/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,3
@ -0,0 +1,22 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
+  <id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
+  <link href='http://example.com/3' type='text/html' rel='alternate'/>
+  <title>Earth</title>
+  <content>the Blue Planet</content>
+  <updated>2006-01-03T00:00:00Z</updated>
+  <source>
+    <id>tag:planet.intertwingly.net,2006:testfeed1</id>
+    <author>
+      <name>Sam Ruby</name>
+      <email>rubys@intertwingly.net</email>
+      <uri>http://www.intertwingly.net/blog/</uri>
+    </author>
+    <link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
+    <link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
+    <subtitle>It’s just data</subtitle>
+    <title>Sam Ruby</title>
+    <updated>2006-06-17T00:15:18Z</updated>
+    <planet:name>one</planet:name>
+  </source>
+</entry>
--- a/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,4
+++ b/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,4
@ -0,0 +1,22 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
+  <id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
+  <link href='http://example.com/4' type='text/html' rel='alternate'/>
+  <title>Mars</title>
+  <content>the Red Planet</content>
+  <updated>2006-01-04T00:00:00Z</updated>
+  <source>
+    <id>tag:planet.intertwingly.net,2006:testfeed1</id>
+    <author>
+      <name>Sam Ruby</name>
+      <email>rubys@intertwingly.net</email>
+      <uri>http://www.intertwingly.net/blog/</uri>
+    </author>
+    <link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
+    <link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
+    <subtitle>It’s just data</subtitle>
+    <title>Sam Ruby</title>
+    <updated>2006-06-17T00:15:18Z</updated>
+    <planet:name>one</planet:name>
+  </source>
+</entry>
--- a/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,1
+++ b/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,1
@ -0,0 +1,22 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
+  <id>tag:planet.intertwingly.net,2006:testfeed2/1</id>
+  <link href='http://example.com/1' type='text/html' rel='alternate'/>
+  <title>Mercury</title>
+  <content>Messenger of the Roman Gods</content>
+  <updated>2006-01-01T00:00:00Z</updated>
+  <source>
+    <id>tag:planet.intertwingly.net,2006:testfeed2</id>
+    <author>
+      <name>Sam Ruby</name>
+      <email>rubys@intertwingly.net</email>
+      <uri>http://www.intertwingly.net/blog/</uri>
+    </author>
+    <link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
+    <link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
+    <subtitle>It’s just data</subtitle>
+    <title>Sam Ruby</title>
+    <updated>2006-06-17T00:15:18Z</updated>
+    <planet:name>two</planet:name>
+  </source>
+</entry>
--- a/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,2
+++ b/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,2
@ -0,0 +1,22 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
+  <id>tag:planet.intertwingly.net,2006:testfeed2/2</id>
+  <link href='http://example.com/2' type='text/html' rel='alternate'/>
+  <title>Venus</title>
+  <content>the Morning Star</content>
+  <updated>2006-01-02T00:00:00Z</updated>
+  <source>
+    <id>tag:planet.intertwingly.net,2006:testfeed2</id>
+    <author>
+      <name>Sam Ruby</name>
+      <email>rubys@intertwingly.net</email>
+      <uri>http://www.intertwingly.net/blog/</uri>
+    </author>
+    <link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
+    <link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
+    <subtitle>It’s just data</subtitle>
+    <title>Sam Ruby</title>
+    <updated>2006-06-17T00:15:18Z</updated>
+    <planet:name>two</planet:name>
+  </source>
+</entry>
--- a/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,3
+++ b/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,3
@ -0,0 +1,22 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
+  <id>tag:planet.intertwingly.net,2006:testfeed2/3</id>
+  <link href='http://example.com/3' type='text/html' rel='alternate'/>
+  <title>Earth</title>
+  <content>the Blue Planet</content>
+  <updated>2006-01-03T00:00:00Z</updated>
+  <source>
+    <id>tag:planet.intertwingly.net,2006:testfeed2</id>
+    <author>
+      <name>Sam Ruby</name>
+      <email>rubys@intertwingly.net</email>
+      <uri>http://www.intertwingly.net/blog/</uri>
+    </author>
+    <link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
+    <link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
+    <subtitle>It’s just data</subtitle>
+    <title>Sam Ruby</title>
+    <updated>2006-06-17T00:15:18Z</updated>
+    <planet:name>two</planet:name>
+  </source>
+</entry>
--- a/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,4
+++ b/tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,4
@ -0,0 +1,22 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
+  <id>tag:planet.intertwingly.net,2006:testfeed2/4</id>
+  <link href='http://example.com/4' type='text/html' rel='alternate'/>
+  <title>Mars</title>
+  <content>the Red Planet</content>
+  <updated>2006-01-04T00:00:00Z</updated>
+  <source>
+    <id>tag:planet.intertwingly.net,2006:testfeed2</id>
+    <author>
+      <name>Sam Ruby</name>
+      <email>rubys@intertwingly.net</email>
+      <uri>http://www.intertwingly.net/blog/</uri>
+    </author>
+    <link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
+    <link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
+    <subtitle>It’s just data</subtitle>
+    <title>Sam Ruby</title>
+    <updated>2006-06-17T00:15:18Z</updated>
+    <planet:name>two</planet:name>
+  </source>
+</entry>
--- a/tests/data/splice/config.ini
+++ b/tests/data/splice/config.ini
@ -0,0 +1,11 @@
+[Planet]
+name = test planet
+cache_directory = tests/data/splice/cache
+template_files = 
+
+[tests/data/spider/testfeed1b.atom]
+name = one
+
+[tests/data/spider/testfeed2.atom]
+name = two
+
--- a/tests/test_config.py
+++ b/tests/test_config.py
@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+import unittest
+from planet import config
+
+workdir = 'tests/work/spider/cache'
+
+class ConfigTest(unittest.TestCase):
+    def setUp(self):
+        config.load('tests/data/config/basic.ini')
+
+    # administrivia
+
+    def test_template(self):
+        self.assertEqual(['index.html.tmpl', 'atom.xml.tmpl'], 
+            config.template_files())
+
+    def test_feeds(self):
+        self.assertEqual(['feed1', 'feed2'], config.feeds())
+
+    # planet wide configuration
+
+    def test_name(self):
+        self.assertEqual('Test Configuration', config.name())
+
+    def test_link(self):
+        self.assertEqual('Unconfigured Planet', config.link())
+
+    # per template configuration
+
+    def test_days_per_page(self):
+        self.assertEqual(7, config.days_per_page('index.html.tmpl'))
+        self.assertEqual(0, config.days_per_page('atom.xml.tmpl'))
+
+    def test_items_per_page(self):
+        self.assertEqual(50, config.items_per_page('index.html.tmpl'))
+        self.assertEqual(50, config.items_per_page('atom.xml.tmpl'))
+
+    def test_encoding(self):
+        self.assertEqual('utf-8', config.encoding('index.html.tmpl'))
+        self.assertEqual('utf-8', config.encoding('atom.xml.tmpl'))
+
+    # dictionaries
+
+    def test_feed_options(self):
+        self.assertEqual('one', config.feed_options('feed1')['name'])
+        self.assertEqual('two', config.feed_options('feed2')['name'])
+
+    def test_template_options(self):
+        option = config.template_options('index.html.tmpl')
+        self.assertEqual('7',  option['days_per_page'])
+        self.assertEqual('50', option['items_per_page'])
--- a/tests/test_reconstitute.py
+++ b/tests/test_reconstitute.py
@ -0,0 +1,37 @@
+#!/usr/bin/env python
+
+import unittest, os, sys, glob, new, re, StringIO, time
+from planet import feedparser
+from planet.reconstitute import reconstitute
+
+testfiles = 'tests/data/reconstitute/%s.xml'
+
+class ReconstituteTest(unittest.TestCase):
+    desc_re = re.compile("Description:\s*(.*?)\s*Expect:\s*(.*)\s*-->")
+
+    def eval(self, name):
+        # read the test case
+        try:
+            testcase = open(testfiles % name)
+            data = testcase.read()
+            description, expect = self.desc_re.search(data).groups()
+            testcase.close()
+        except:
+            raise RuntimeError, "can't parse %s" % name
+
+        # parse and reconstitute to a string
+        work = StringIO.StringIO()
+        results = feedparser.parse(data)
+        reconstitute(results, results.entries[0]).writexml(work)
+
+        # verify the results
+        results = feedparser.parse(work.getvalue().encode('utf-8'))
+        self.assertFalse(results.bozo, 'xml is well formed')
+        self.assertTrue(eval(expect, results.entries[0]), expect)
+
+# build a test method for each test file
+for testcase in glob.glob(testfiles % '*'):
+    root = os.path.splitext(os.path.basename(testcase))[0]
+    func = lambda self, name=root: self.eval(name)
+    method = new.instancemethod(func, None, ReconstituteTest)
+    setattr(ReconstituteTest, "test_" + root, method)
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+import unittest, os, glob, calendar
+from planet.spider import filename, spiderFeed, spiderPlanet
+from planet import feedparser, config
+
+workdir = 'tests/work/spider/cache'
+testfeed = 'tests/data/spider/testfeed%s.atom'
+configfile = 'tests/data/spider/config.ini'
+
+class SpiderTest(unittest.TestCase):
+    def setUp(self):
+        try:
+             os.makedirs(workdir)
+        except:
+             self.tearDown()
+             os.makedirs(workdir)
+    
+    def tearDown(self):
+        for file in glob.glob(workdir+"/*"):
+             os.unlink(file)
+        os.removedirs(workdir)
+
+    def test_filename(self):
+        self.assertEqual('./example.com,index.html',
+            filename('.', 'http://example.com/index.html'))
+        self.assertEqual('./www.xn--8ws00zhy3a.com',
+            filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))
+
+    def test_spiderFeed(self):
+        config.load(configfile)
+        spiderFeed(testfeed % '1b')
+        files = glob.glob(workdir+"/*")
+
+        # verify that exactly four files were produced
+        self.assertEqual(4, len(files))
+
+        # verify that the file names are as expected
+        self.assertTrue(workdir + 
+            '/tag:planet.intertwingly.net,2006:testfeed1,1' in files)
+
+        # verify that the file timestamps match atom:updated
+        for file in files:
+            data = feedparser.parse(file)
+            self.assertTrue(data.entries[0].source.planet_name)
+            self.assertEqual(os.stat(file).st_mtime,
+                calendar.timegm(data.entries[0].updated_parsed))
+
+    def test_spiderUpdate(self):
+        spiderFeed(testfeed % '1a')
+        self.test_spiderFeed()
+
+    def test_spiderPlanet(self):
+        spiderPlanet(configfile)
+        files = glob.glob(workdir+"/*")
+
+        # verify that exactly eight files were produced
+        self.assertEqual(12, len(files))
+
+        # verify that the file names are as expected
+        self.assertTrue(workdir + 
+            '/tag:planet.intertwingly.net,2006:testfeed1,1' in files)
+        self.assertTrue(workdir + 
+            '/tag:planet.intertwingly.net,2006:testfeed2,1' in files)
+
--- a/tests/test_splice.py
+++ b/tests/test_splice.py
@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+import unittest
+from planet.splice import splice
+
+configfile = 'tests/data/splice/config.ini'
+
+class SpliceTest(unittest.TestCase):
+
+    def test_splice(self):
+        doc = splice(configfile)
+        self.assertEqual(8,len(doc.getElementsByTagName('entry')))
+        self.assertEqual(2,len(doc.getElementsByTagName('planet:subscription')))
+        self.assertEqual(10,len(doc.getElementsByTagName('planet:name')))
+
+        self.assertEqual('test planet',
+            doc.getElementsByTagName('title')[0].firstChild.nodeValue)