Initial load

This commit is contained in:
Sam Ruby 2006-08-16 11:54:54 -04:00
commit b31973d514
79 changed files with 9907 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 469 B

BIN
examples/images/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

150
examples/planet.css Normal file
View File

@ -0,0 +1,150 @@
body {
border-right: 1px solid black;
margin-right: 200px;
padding-left: 20px;
padding-right: 20px;
}
h1 {
margin-top: 0px;
padding-top: 20px;
font-family: "Bitstream Vera Sans", sans-serif;
font-weight: normal;
letter-spacing: -2px;
text-transform: lowercase;
text-align: right;
color: grey;
}
.admin {
text-align: right;
}
h2 {
font-family: "Bitstream Vera Sans", sans-serif;
font-weight: normal;
color: #200080;
margin-left: -20px;
}
h3 {
font-family: "Bitstream Vera Sans", sans-serif;
font-weight: normal;
background-color: #a0c0ff;
border: 1px solid #5080b0;
padding: 4px;
}
h3 a {
text-decoration: none;
color: inherit;
}
h4 {
font-family: "Bitstream Vera Sans", sans-serif;
font-weight: bold;
}
h4 a {
text-decoration: none;
color: inherit;
}
img.face {
float: right;
margin-top: -3em;
}
.entry {
margin-bottom: 2em;
}
.entry .date {
font-family: "Bitstream Vera Sans", sans-serif;
color: grey;
}
.entry .date a {
text-decoration: none;
color: inherit;
}
.sidebar {
position: absolute;
top: 0px;
right: 0px;
width: 200px;
margin-left: 0px;
margin-right: 0px;
padding-right: 0px;
padding-top: 20px;
padding-left: 0px;
font-family: "Bitstream Vera Sans", sans-serif;
font-size: 85%;
}
.sidebar h2 {
font-size: 110%;
font-weight: bold;
color: black;
padding-left: 5px;
margin-left: 0px;
}
.sidebar ul {
padding-left: 1em;
margin-left: 0px;
list-style-type: none;
}
.sidebar ul li:hover {
color: grey;
}
.sidebar ul li a {
text-decoration: none;
}
.sidebar ul li a:hover {
text-decoration: underline;
}
.sidebar ul li a img {
border: 0;
}
.sidebar p {
border-top: 1px solid grey;
margin-top: 30px;
padding-top: 10px;
padding-left: 5px;
}
.sidebar .message {
cursor: help;
border-bottom: 1px dashed red;
}
.sidebar a.message:hover {
cursor: help;
background-color: #ff0000;
color: #ffffff !important;
text-decoration: none !important;
}
a:hover {
text-decoration: underline !important;
color: blue !important;
}

65
examples/planet.xslt Normal file
View File

@ -0,0 +1,65 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:planet="http://planet.intertwingly.net/"
xmlns="http://www.w3.org/1999/xhtml">
<xsl:template match="atom:feed">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<link rel="stylesheet" href="planet.css" type="text/css" />
<title><xsl:value-of select="atom:title"/></title>
</head>
<body>
<h1><xsl:value-of select="atom:title"/></h1>
<xsl:apply-templates select="atom:entry"/>
<div class="sidebar">
<img src="images/logo.png" width="136" height="136" alt=""/>
<h2>Subscriptions</h2>
<ul>
<xsl:for-each select="planet:subscription">
<xsl:sort select="planet:name"/>
<li>
<a href="{atom:link[@rel='self']/@href}" title="subscribe">
<img src="images/feed-icon-10x10.png" alt="(feed)"/>
</a>
<xsl:value-of select="planet:name"/>
</li>
</xsl:for-each>
</ul>
</div>
</body>
</html>
</xsl:template>
<xsl:template match="atom:entry">
<xsl:variable name="date" select="substring(atom:updated,1,10)"/>
<xsl:if test="not(preceding-sibling::atom:entry
[substring(atom:updated,1,10) = $date])">
<h2 class="date"><xsl:value-of select="$date"/></h2>
</xsl:if>
<h3>
<a href="{atom:source/atom:link[@rel='alternate']/@href}">
<xsl:value-of select="atom:source/planet:name"/>
</a>
&#x2014;
<a href="{atom:link[@rel='alternate']/@href}">
<xsl:value-of select="atom:title"/>
</a>
</h3>
<div class="content">
<xsl:choose>
<xsl:when test="atom:content">
<p><xsl:copy-of select="atom:content/*"/></p>
</xsl:when>
<xsl:otherwise>
<p><xsl:copy-of select="atom:summary/*"/></p>
</xsl:otherwise>
</xsl:choose>
</div>
</xsl:template>
</xsl:stylesheet>

1824
planet/BeautifulSoup.py Normal file

File diff suppressed because it is too large Load Diff

45
planet/__init__.py Normal file
View File

@ -0,0 +1,45 @@
logger = None
def getLogger(level):
""" get a logger with the specified log level """
global logger
if logger: return logger
try:
import logging
except:
import compat_logging as logging
logging.basicConfig()
logging.getLogger().setLevel(logging.getLevelName(level))
logger = logging.getLogger("planet.runner")
try:
logger.warning
except:
logger.warning = logger.warn
return logger
def setTimeout(timeout):
""" time out rather than hang forever on ultra-slow servers."""
if timeout:
try:
timeout = float(timeout)
except:
logger.warning("Timeout set to invalid value '%s', skipping", timeout)
timeout = None
if timeout:
try:
from planet import timeoutsocket
timeoutsocket.setDefaultSocketTimeout(timeout)
logger.debug("Socket timeout set to %d seconds", timeout)
except ImportError:
import socket
if hasattr(socket, 'setdefaulttimeout'):
logger.debug("timeoutsocket not found, using python function")
socket.setdefaulttimeout(timeout)
logger.debug("Socket timeout set to %d seconds", timeout)
else:
logger.error("Unable to set timeout to %d seconds", timeout)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,299 @@
# Copyright 2001-2002 by Vinay Sajip. All Rights Reserved.
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted,
# provided that the above copyright notice appear in all copies and that
# both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of Vinay Sajip
# not be used in advertising or publicity pertaining to distribution
# of the software without specific, written prior permission.
# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
"""
Logging package for Python. Based on PEP 282 and comments thereto in
comp.lang.python, and influenced by Apache's log4j system.
Should work under Python versions >= 1.5.2, except that source line
information is not available unless 'inspect' is.
Copyright (C) 2001-2002 Vinay Sajip. All Rights Reserved.
To use, simply 'import logging' and log away!
"""
import sys, logging, logging.handlers, string, thread, threading, socket, struct, os
from SocketServer import ThreadingTCPServer, StreamRequestHandler
DEFAULT_LOGGING_CONFIG_PORT = 9030
if sys.platform == "win32":
RESET_ERROR = 10054 #WSAECONNRESET
else:
RESET_ERROR = 104 #ECONNRESET
#
# The following code implements a socket listener for on-the-fly
# reconfiguration of logging.
#
# _listener holds the server object doing the listening
_listener = None
def fileConfig(fname, defaults=None):
"""
Read the logging configuration from a ConfigParser-format file.
This can be called several times from an application, allowing an end user
the ability to select from various pre-canned configurations (if the
developer provides a mechanism to present the choices and load the chosen
configuration).
In versions of ConfigParser which have the readfp method [typically
shipped in 2.x versions of Python], you can pass in a file-like object
rather than a filename, in which case the file-like object will be read
using readfp.
"""
import ConfigParser
cp = ConfigParser.ConfigParser(defaults)
if hasattr(cp, 'readfp') and hasattr(fname, 'readline'):
cp.readfp(fname)
else:
cp.read(fname)
#first, do the formatters...
flist = cp.get("formatters", "keys")
if len(flist):
flist = string.split(flist, ",")
formatters = {}
for form in flist:
sectname = "formatter_%s" % form
opts = cp.options(sectname)
if "format" in opts:
fs = cp.get(sectname, "format", 1)
else:
fs = None
if "datefmt" in opts:
dfs = cp.get(sectname, "datefmt", 1)
else:
dfs = None
f = logging.Formatter(fs, dfs)
formatters[form] = f
#next, do the handlers...
#critical section...
logging._acquireLock()
try:
try:
#first, lose the existing handlers...
logging._handlers.clear()
#now set up the new ones...
hlist = cp.get("handlers", "keys")
if len(hlist):
hlist = string.split(hlist, ",")
handlers = {}
fixups = [] #for inter-handler references
for hand in hlist:
sectname = "handler_%s" % hand
klass = cp.get(sectname, "class")
opts = cp.options(sectname)
if "formatter" in opts:
fmt = cp.get(sectname, "formatter")
else:
fmt = ""
klass = eval(klass, vars(logging))
args = cp.get(sectname, "args")
args = eval(args, vars(logging))
h = apply(klass, args)
if "level" in opts:
level = cp.get(sectname, "level")
h.setLevel(logging._levelNames[level])
if len(fmt):
h.setFormatter(formatters[fmt])
#temporary hack for FileHandler and MemoryHandler.
if klass == logging.handlers.MemoryHandler:
if "target" in opts:
target = cp.get(sectname,"target")
else:
target = ""
if len(target): #the target handler may not be loaded yet, so keep for later...
fixups.append((h, target))
handlers[hand] = h
#now all handlers are loaded, fixup inter-handler references...
for fixup in fixups:
h = fixup[0]
t = fixup[1]
h.setTarget(handlers[t])
#at last, the loggers...first the root...
llist = cp.get("loggers", "keys")
llist = string.split(llist, ",")
llist.remove("root")
sectname = "logger_root"
root = logging.root
log = root
opts = cp.options(sectname)
if "level" in opts:
level = cp.get(sectname, "level")
log.setLevel(logging._levelNames[level])
for h in root.handlers[:]:
root.removeHandler(h)
hlist = cp.get(sectname, "handlers")
if len(hlist):
hlist = string.split(hlist, ",")
for hand in hlist:
log.addHandler(handlers[hand])
#and now the others...
#we don't want to lose the existing loggers,
#since other threads may have pointers to them.
#existing is set to contain all existing loggers,
#and as we go through the new configuration we
#remove any which are configured. At the end,
#what's left in existing is the set of loggers
#which were in the previous configuration but
#which are not in the new configuration.
existing = root.manager.loggerDict.keys()
#now set up the new ones...
for log in llist:
sectname = "logger_%s" % log
qn = cp.get(sectname, "qualname")
opts = cp.options(sectname)
if "propagate" in opts:
propagate = cp.getint(sectname, "propagate")
else:
propagate = 1
logger = logging.getLogger(qn)
if qn in existing:
existing.remove(qn)
if "level" in opts:
level = cp.get(sectname, "level")
logger.setLevel(logging._levelNames[level])
for h in logger.handlers[:]:
logger.removeHandler(h)
logger.propagate = propagate
logger.disabled = 0
hlist = cp.get(sectname, "handlers")
if len(hlist):
hlist = string.split(hlist, ",")
for hand in hlist:
logger.addHandler(handlers[hand])
#Disable any old loggers. There's no point deleting
#them as other threads may continue to hold references
#and by disabling them, you stop them doing any logging.
for log in existing:
root.manager.loggerDict[log].disabled = 1
except:
import traceback
ei = sys.exc_info()
traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr)
del ei
finally:
logging._releaseLock()
def listen(port=DEFAULT_LOGGING_CONFIG_PORT):
"""
Start up a socket server on the specified port, and listen for new
configurations.
These will be sent as a file suitable for processing by fileConfig().
Returns a Thread object on which you can call start() to start the server,
and which you can join() when appropriate. To stop the server, call
stopListening().
"""
if not thread:
raise NotImplementedError, "listen() needs threading to work"
class ConfigStreamHandler(StreamRequestHandler):
"""
Handler for a logging configuration request.
It expects a completely new logging configuration and uses fileConfig
to install it.
"""
def handle(self):
"""
Handle a request.
Each request is expected to be a 4-byte length,
followed by the config file. Uses fileConfig() to do the
grunt work.
"""
import tempfile
try:
conn = self.connection
chunk = conn.recv(4)
if len(chunk) == 4:
slen = struct.unpack(">L", chunk)[0]
chunk = self.connection.recv(slen)
while len(chunk) < slen:
chunk = chunk + conn.recv(slen - len(chunk))
#Apply new configuration. We'd like to be able to
#create a StringIO and pass that in, but unfortunately
#1.5.2 ConfigParser does not support reading file
#objects, only actual files. So we create a temporary
#file and remove it later.
file = tempfile.mktemp(".ini")
f = open(file, "w")
f.write(chunk)
f.close()
fileConfig(file)
os.remove(file)
except socket.error, e:
if type(e.args) != types.TupleType:
raise
else:
errcode = e.args[0]
if errcode != RESET_ERROR:
raise
class ConfigSocketReceiver(ThreadingTCPServer):
"""
A simple TCP socket-based logging config receiver.
"""
allow_reuse_address = 1
def __init__(self, host='localhost', port=DEFAULT_LOGGING_CONFIG_PORT,
handler=None):
ThreadingTCPServer.__init__(self, (host, port), handler)
logging._acquireLock()
self.abort = 0
logging._releaseLock()
self.timeout = 1
def serve_until_stopped(self):
import select
abort = 0
while not abort:
rd, wr, ex = select.select([self.socket.fileno()],
[], [],
self.timeout)
if rd:
self.handle_request()
logging._acquireLock()
abort = self.abort
logging._releaseLock()
def serve(rcvr, hdlr, port):
server = rcvr(port=port, handler=hdlr)
global _listener
logging._acquireLock()
_listener = server
logging._releaseLock()
server.serve_until_stopped()
return threading.Thread(target=serve,
args=(ConfigSocketReceiver,
ConfigStreamHandler, port))
def stopListening():
"""
Stop the listening server which was created with a call to listen().
"""
global _listener
if _listener:
logging._acquireLock()
_listener.abort = 1
_listener = None
logging._releaseLock()

View File

@ -0,0 +1,728 @@
# Copyright 2001-2002 by Vinay Sajip. All Rights Reserved.
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted,
# provided that the above copyright notice appear in all copies and that
# both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of Vinay Sajip
# not be used in advertising or publicity pertaining to distribution
# of the software without specific, written prior permission.
# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
"""
Logging package for Python. Based on PEP 282 and comments thereto in
comp.lang.python, and influenced by Apache's log4j system.
Should work under Python versions >= 1.5.2, except that source line
information is not available unless 'inspect' is.
Copyright (C) 2001-2002 Vinay Sajip. All Rights Reserved.
To use, simply 'import logging' and log away!
"""
import sys, logging, socket, types, os, string, cPickle, struct, time
from SocketServer import ThreadingTCPServer, StreamRequestHandler
#
# Some constants...
#
DEFAULT_TCP_LOGGING_PORT = 9020
DEFAULT_UDP_LOGGING_PORT = 9021
DEFAULT_HTTP_LOGGING_PORT = 9022
DEFAULT_SOAP_LOGGING_PORT = 9023
SYSLOG_UDP_PORT = 514
class RotatingFileHandler(logging.FileHandler):
def __init__(self, filename, mode="a", maxBytes=0, backupCount=0):
"""
Open the specified file and use it as the stream for logging.
By default, the file grows indefinitely. You can specify particular
values of maxBytes and backupCount to allow the file to rollover at
a predetermined size.
Rollover occurs whenever the current log file is nearly maxBytes in
length. If backupCount is >= 1, the system will successively create
new files with the same pathname as the base file, but with extensions
".1", ".2" etc. appended to it. For example, with a backupCount of 5
and a base file name of "app.log", you would get "app.log",
"app.log.1", "app.log.2", ... through to "app.log.5". The file being
written to is always "app.log" - when it gets filled up, it is closed
and renamed to "app.log.1", and if files "app.log.1", "app.log.2" etc.
exist, then they are renamed to "app.log.2", "app.log.3" etc.
respectively.
If maxBytes is zero, rollover never occurs.
"""
logging.FileHandler.__init__(self, filename, mode)
self.maxBytes = maxBytes
self.backupCount = backupCount
if maxBytes > 0:
self.mode = "a"
def doRollover(self):
"""
Do a rollover, as described in __init__().
"""
self.stream.close()
if self.backupCount > 0:
for i in range(self.backupCount - 1, 0, -1):
sfn = "%s.%d" % (self.baseFilename, i)
dfn = "%s.%d" % (self.baseFilename, i + 1)
if os.path.exists(sfn):
#print "%s -> %s" % (sfn, dfn)
if os.path.exists(dfn):
os.remove(dfn)
os.rename(sfn, dfn)
dfn = self.baseFilename + ".1"
if os.path.exists(dfn):
os.remove(dfn)
os.rename(self.baseFilename, dfn)
#print "%s -> %s" % (self.baseFilename, dfn)
self.stream = open(self.baseFilename, "w")
def emit(self, record):
"""
Emit a record.
Output the record to the file, catering for rollover as described
in doRollover().
"""
if self.maxBytes > 0: # are we rolling over?
msg = "%s\n" % self.format(record)
self.stream.seek(0, 2) #due to non-posix-compliant Windows feature
if self.stream.tell() + len(msg) >= self.maxBytes:
self.doRollover()
logging.FileHandler.emit(self, record)
class SocketHandler(logging.Handler):
"""
A handler class which writes logging records, in pickle format, to
a streaming socket. The socket is kept open across logging calls.
If the peer resets it, an attempt is made to reconnect on the next call.
The pickle which is sent is that of the LogRecord's attribute dictionary
(__dict__), so that the receiver does not need to have the logging module
installed in order to process the logging event.
To unpickle the record at the receiving end into a LogRecord, use the
makeLogRecord function.
"""
def __init__(self, host, port):
"""
Initializes the handler with a specific host address and port.
The attribute 'closeOnError' is set to 1 - which means that if
a socket error occurs, the socket is silently closed and then
reopened on the next logging call.
"""
logging.Handler.__init__(self)
self.host = host
self.port = port
self.sock = None
self.closeOnError = 0
def makeSocket(self):
"""
A factory method which allows subclasses to define the precise
type of socket they want.
"""
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((self.host, self.port))
return s
def send(self, s):
"""
Send a pickled string to the socket.
This function allows for partial sends which can happen when the
network is busy.
"""
if hasattr(self.sock, "sendall"):
self.sock.sendall(s)
else:
sentsofar = 0
left = len(s)
while left > 0:
sent = self.sock.send(s[sentsofar:])
sentsofar = sentsofar + sent
left = left - sent
def makePickle(self, record):
"""
Pickles the record in binary format with a length prefix, and
returns it ready for transmission across the socket.
"""
s = cPickle.dumps(record.__dict__, 1)
#n = len(s)
#slen = "%c%c" % ((n >> 8) & 0xFF, n & 0xFF)
slen = struct.pack(">L", len(s))
return slen + s
def handleError(self, record):
"""
Handle an error during logging.
An error has occurred during logging. Most likely cause -
connection lost. Close the socket so that we can retry on the
next event.
"""
if self.closeOnError and self.sock:
self.sock.close()
self.sock = None #try to reconnect next time
else:
logging.Handler.handleError(self, record)
def emit(self, record):
"""
Emit a record.
Pickles the record and writes it to the socket in binary format.
If there is an error with the socket, silently drop the packet.
If there was a problem with the socket, re-establishes the
socket.
"""
try:
s = self.makePickle(record)
if not self.sock:
self.sock = self.makeSocket()
self.send(s)
except:
self.handleError(record)
def close(self):
"""
Closes the socket.
"""
if self.sock:
self.sock.close()
self.sock = None
class DatagramHandler(SocketHandler):
"""
A handler class which writes logging records, in pickle format, to
a datagram socket. The pickle which is sent is that of the LogRecord's
attribute dictionary (__dict__), so that the receiver does not need to
have the logging module installed in order to process the logging event.
To unpickle the record at the receiving end into a LogRecord, use the
makeLogRecord function.
"""
def __init__(self, host, port):
"""
Initializes the handler with a specific host address and port.
"""
SocketHandler.__init__(self, host, port)
self.closeOnError = 0
def makeSocket(self):
"""
The factory method of SocketHandler is here overridden to create
a UDP socket (SOCK_DGRAM).
"""
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return s
def send(self, s):
"""
Send a pickled string to a socket.
This function no longer allows for partial sends which can happen
when the network is busy - UDP does not guarantee delivery and
can deliver packets out of sequence.
"""
self.sock.sendto(s, (self.host, self.port))
class SysLogHandler(logging.Handler):
"""
A handler class which sends formatted logging records to a syslog
server. Based on Sam Rushing's syslog module:
http://www.nightmare.com/squirl/python-ext/misc/syslog.py
Contributed by Nicolas Untz (after which minor refactoring changes
have been made).
"""
# from <linux/sys/syslog.h>:
# ======================================================================
# priorities/facilities are encoded into a single 32-bit quantity, where
# the bottom 3 bits are the priority (0-7) and the top 28 bits are the
# facility (0-big number). Both the priorities and the facilities map
# roughly one-to-one to strings in the syslogd(8) source code. This
# mapping is included in this file.
#
# priorities (these are ordered)
LOG_EMERG = 0 # system is unusable
LOG_ALERT = 1 # action must be taken immediately
LOG_CRIT = 2 # critical conditions
LOG_ERR = 3 # error conditions
LOG_WARNING = 4 # warning conditions
LOG_NOTICE = 5 # normal but significant condition
LOG_INFO = 6 # informational
LOG_DEBUG = 7 # debug-level messages
# facility codes
LOG_KERN = 0 # kernel messages
LOG_USER = 1 # random user-level messages
LOG_MAIL = 2 # mail system
LOG_DAEMON = 3 # system daemons
LOG_AUTH = 4 # security/authorization messages
LOG_SYSLOG = 5 # messages generated internally by syslogd
LOG_LPR = 6 # line printer subsystem
LOG_NEWS = 7 # network news subsystem
LOG_UUCP = 8 # UUCP subsystem
LOG_CRON = 9 # clock daemon
LOG_AUTHPRIV = 10 # security/authorization messages (private)
# other codes through 15 reserved for system use
LOG_LOCAL0 = 16 # reserved for local use
LOG_LOCAL1 = 17 # reserved for local use
LOG_LOCAL2 = 18 # reserved for local use
LOG_LOCAL3 = 19 # reserved for local use
LOG_LOCAL4 = 20 # reserved for local use
LOG_LOCAL5 = 21 # reserved for local use
LOG_LOCAL6 = 22 # reserved for local use
LOG_LOCAL7 = 23 # reserved for local use
priority_names = {
"alert": LOG_ALERT,
"crit": LOG_CRIT,
"critical": LOG_CRIT,
"debug": LOG_DEBUG,
"emerg": LOG_EMERG,
"err": LOG_ERR,
"error": LOG_ERR, # DEPRECATED
"info": LOG_INFO,
"notice": LOG_NOTICE,
"panic": LOG_EMERG, # DEPRECATED
"warn": LOG_WARNING, # DEPRECATED
"warning": LOG_WARNING,
}
facility_names = {
"auth": LOG_AUTH,
"authpriv": LOG_AUTHPRIV,
"cron": LOG_CRON,
"daemon": LOG_DAEMON,
"kern": LOG_KERN,
"lpr": LOG_LPR,
"mail": LOG_MAIL,
"news": LOG_NEWS,
"security": LOG_AUTH, # DEPRECATED
"syslog": LOG_SYSLOG,
"user": LOG_USER,
"uucp": LOG_UUCP,
"local0": LOG_LOCAL0,
"local1": LOG_LOCAL1,
"local2": LOG_LOCAL2,
"local3": LOG_LOCAL3,
"local4": LOG_LOCAL4,
"local5": LOG_LOCAL5,
"local6": LOG_LOCAL6,
"local7": LOG_LOCAL7,
}
def __init__(self, address=('localhost', SYSLOG_UDP_PORT), facility=LOG_USER):
"""
Initialize a handler.
If address is specified as a string, UNIX socket is used.
If facility is not specified, LOG_USER is used.
"""
logging.Handler.__init__(self)
self.address = address
self.facility = facility
if type(address) == types.StringType:
self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
# syslog may require either DGRAM or STREAM sockets
try:
self.socket.connect(address)
except socket.error:
self.socket.close()
self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
self.socket.connect(address)
self.unixsocket = 1
else:
self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
self.unixsocket = 0
self.formatter = None
# curious: when talking to the unix-domain '/dev/log' socket, a
# zero-terminator seems to be required. this string is placed
# into a class variable so that it can be overridden if
# necessary.
log_format_string = '<%d>%s\000'
def encodePriority (self, facility, priority):
"""
Encode the facility and priority. You can pass in strings or
integers - if strings are passed, the facility_names and
priority_names mapping dictionaries are used to convert them to
integers.
"""
if type(facility) == types.StringType:
facility = self.facility_names[facility]
if type(priority) == types.StringType:
priority = self.priority_names[priority]
return (facility << 3) | priority
def close (self):
"""
Closes the socket.
"""
if self.unixsocket:
self.socket.close()
def emit(self, record):
"""
Emit a record.
The record is formatted, and then sent to the syslog server. If
exception information is present, it is NOT sent to the server.
"""
msg = self.format(record)
"""
We need to convert record level to lowercase, maybe this will
change in the future.
"""
msg = self.log_format_string % (
self.encodePriority(self.facility,
string.lower(record.levelname)),
msg)
try:
if self.unixsocket:
self.socket.send(msg)
else:
self.socket.sendto(msg, self.address)
except:
self.handleError(record)
class SMTPHandler(logging.Handler):
"""
A handler class which sends an SMTP email for each logging event.
"""
def __init__(self, mailhost, fromaddr, toaddrs, subject):
"""
Initialize the handler.
Initialize the instance with the from and to addresses and subject
line of the email. To specify a non-standard SMTP port, use the
(host, port) tuple format for the mailhost argument.
"""
logging.Handler.__init__(self)
if type(mailhost) == types.TupleType:
host, port = mailhost
self.mailhost = host
self.mailport = port
else:
self.mailhost = mailhost
self.mailport = None
self.fromaddr = fromaddr
if type(toaddrs) == types.StringType:
toaddrs = [toaddrs]
self.toaddrs = toaddrs
self.subject = subject
def getSubject(self, record):
"""
Determine the subject for the email.
If you want to specify a subject line which is record-dependent,
override this method.
"""
return self.subject
weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
monthname = [None,
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
def date_time(self):
"""Return the current date and time formatted for a MIME header."""
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(time.time())
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
self.weekdayname[wd],
day, self.monthname[month], year,
hh, mm, ss)
return s
def emit(self, record):
"""
Emit a record.
Format the record and send it to the specified addressees.
"""
try:
import smtplib
port = self.mailport
if not port:
port = smtplib.SMTP_PORT
smtp = smtplib.SMTP(self.mailhost, port)
msg = self.format(record)
msg = "From: %s\r\nTo: %s\r\nSubject: %s\r\nDate: %s\r\n\r\n%s" % (
self.fromaddr,
string.join(self.toaddrs, ","),
self.getSubject(record),
self.date_time(), msg)
smtp.sendmail(self.fromaddr, self.toaddrs, msg)
smtp.quit()
except:
self.handleError(record)
class NTEventLogHandler(logging.Handler):
"""
A handler class which sends events to the NT Event Log. Adds a
registry entry for the specified application name. If no dllname is
provided, win32service.pyd (which contains some basic message
placeholders) is used. Note that use of these placeholders will make
your event logs big, as the entire message source is held in the log.
If you want slimmer logs, you have to pass in the name of your own DLL
which contains the message definitions you want to use in the event log.
"""
def __init__(self, appname, dllname=None, logtype="Application"):
logging.Handler.__init__(self)
try:
import win32evtlogutil, win32evtlog
self.appname = appname
self._welu = win32evtlogutil
if not dllname:
dllname = os.path.split(self._welu.__file__)
dllname = os.path.split(dllname[0])
dllname = os.path.join(dllname[0], r'win32service.pyd')
self.dllname = dllname
self.logtype = logtype
self._welu.AddSourceToRegistry(appname, dllname, logtype)
self.deftype = win32evtlog.EVENTLOG_ERROR_TYPE
self.typemap = {
logging.DEBUG : win32evtlog.EVENTLOG_INFORMATION_TYPE,
logging.INFO : win32evtlog.EVENTLOG_INFORMATION_TYPE,
logging.WARNING : win32evtlog.EVENTLOG_WARNING_TYPE,
logging.ERROR : win32evtlog.EVENTLOG_ERROR_TYPE,
logging.CRITICAL: win32evtlog.EVENTLOG_ERROR_TYPE,
}
except ImportError:
print "The Python Win32 extensions for NT (service, event "\
"logging) appear not to be available."
self._welu = None
def getMessageID(self, record):
"""
Return the message ID for the event record. If you are using your
own messages, you could do this by having the msg passed to the
logger being an ID rather than a formatting string. Then, in here,
you could use a dictionary lookup to get the message ID. This
version returns 1, which is the base message ID in win32service.pyd.
"""
return 1
def getEventCategory(self, record):
"""
Return the event category for the record.
Override this if you want to specify your own categories. This version
returns 0.
"""
return 0
def getEventType(self, record):
"""
Return the event type for the record.
Override this if you want to specify your own types. This version does
a mapping using the handler's typemap attribute, which is set up in
__init__() to a dictionary which contains mappings for DEBUG, INFO,
WARNING, ERROR and CRITICAL. If you are using your own levels you will
either need to override this method or place a suitable dictionary in
the handler's typemap attribute.
"""
return self.typemap.get(record.levelno, self.deftype)
def emit(self, record):
"""
Emit a record.
Determine the message ID, event category and event type. Then
log the message in the NT event log.
"""
if self._welu:
try:
id = self.getMessageID(record)
cat = self.getEventCategory(record)
type = self.getEventType(record)
msg = self.format(record)
self._welu.ReportEvent(self.appname, id, cat, type, [msg])
except:
self.handleError(record)
def close(self):
"""
Clean up this handler.
You can remove the application name from the registry as a
source of event log entries. However, if you do this, you will
not be able to see the events as you intended in the Event Log
Viewer - it needs to be able to access the registry to get the
DLL name.
"""
#self._welu.RemoveSourceFromRegistry(self.appname, self.logtype)
pass
class HTTPHandler(logging.Handler):
"""
A class which sends records to a Web server, using either GET or
POST semantics.
"""
def __init__(self, host, url, method="GET"):
"""
Initialize the instance with the host, the request URL, and the method
("GET" or "POST")
"""
logging.Handler.__init__(self)
method = string.upper(method)
if method not in ["GET", "POST"]:
raise ValueError, "method must be GET or POST"
self.host = host
self.url = url
self.method = method
def mapLogRecord(self, record):
"""
Default implementation of mapping the log record into a dict
that is send as the CGI data. Overwrite in your class.
Contributed by Franz Glasner.
"""
return record.__dict__
def emit(self, record):
"""
Emit a record.
Send the record to the Web server as an URL-encoded dictionary
"""
try:
import httplib, urllib
h = httplib.HTTP(self.host)
url = self.url
data = urllib.urlencode(self.mapLogRecord(record))
if self.method == "GET":
if (string.find(url, '?') >= 0):
sep = '&'
else:
sep = '?'
url = url + "%c%s" % (sep, data)
h.putrequest(self.method, url)
if self.method == "POST":
h.putheader("Content-length", str(len(data)))
h.endheaders()
if self.method == "POST":
h.send(data)
h.getreply() #can't do anything with the result
except:
self.handleError(record)
class BufferingHandler(logging.Handler):
"""
A handler class which buffers logging records in memory. Whenever each
record is added to the buffer, a check is made to see if the buffer should
be flushed. If it should, then flush() is expected to do what's needed.
"""
def __init__(self, capacity):
"""
Initialize the handler with the buffer size.
"""
logging.Handler.__init__(self)
self.capacity = capacity
self.buffer = []
def shouldFlush(self, record):
"""
Should the handler flush its buffer?
Returns true if the buffer is up to capacity. This method can be
overridden to implement custom flushing strategies.
"""
return (len(self.buffer) >= self.capacity)
def emit(self, record):
"""
Emit a record.
Append the record. If shouldFlush() tells us to, call flush() to process
the buffer.
"""
self.buffer.append(record)
if self.shouldFlush(record):
self.flush()
def flush(self):
"""
Override to implement custom flushing behaviour.
This version just zaps the buffer to empty.
"""
self.buffer = []
class MemoryHandler(BufferingHandler):
"""
A handler class which buffers logging records in memory, periodically
flushing them to a target handler. Flushing occurs whenever the buffer
is full, or when an event of a certain severity or greater is seen.
"""
def __init__(self, capacity, flushLevel=logging.ERROR, target=None):
"""
Initialize the handler with the buffer size, the level at which
flushing should occur and an optional target.
Note that without a target being set either here or via setTarget(),
a MemoryHandler is no use to anyone!
"""
BufferingHandler.__init__(self, capacity)
self.flushLevel = flushLevel
self.target = target
def shouldFlush(self, record):
"""
Check for buffer full or a record at the flushLevel or higher.
"""
return (len(self.buffer) >= self.capacity) or \
(record.levelno >= self.flushLevel)
def setTarget(self, target):
"""
Set the target handler for this handler.
"""
self.target = target
def flush(self):
"""
For a MemoryHandler, flushing means just sending the buffered
records to the target, if there is one. Override if you want
different behaviour.
"""
if self.target:
for record in self.buffer:
self.target.handle(record)
self.buffer = []
def close(self):
"""
Flush, set the target to None and lose the buffer.
"""
self.flush()
self.target = None
self.buffer = []

112
planet/config.py Normal file
View File

@ -0,0 +1,112 @@
"""
Planet Configuration
This module encapsulates all planet configuration. This is not a generic
configuration parser, it knows everything about configuring a planet - from
the structure of the ini file, to knowledge of data types, even down to
what are the defaults.
Usage:
from planet import config
config.load('config.ini')
# administrative / structural information
print config.templates()
print config.feeds()
# planet wide configuration
print config.name()
print config.link()
# per template configuration
print config.days_per_page('atom.xml.tmpl')
print config.encoding('index.html.tmpl')
Todo:
* error handling (example: no planet section)
"""
import sys
from ConfigParser import ConfigParser
parser = ConfigParser()
planet_predefined_options = []
def __init__():
"""define the struture of an ini file"""
from planet import config
def get(section, option, default):
if section and parser.has_option(section, option):
return parser.get(section, option)
elif parser.has_option('Planet', option):
return parser.get('Planet', option)
else:
return default
def define_planet(name, default):
setattr(config, name, lambda default=default: get(None,name,default))
planet_predefined_options.append(name)
def define_tmpl(name, default):
setattr(config, name, lambda section, default=default:
get(section,name,default))
def define_tmpl_int(name, default):
setattr(config, name, lambda section, default=default:
int(get(section,name,default)))
# planet wide options
define_planet('name', "Unconfigured Planet")
define_planet('link', "Unconfigured Planet")
define_planet('cache_directory', "cache")
define_planet('log_level', "WARNING")
define_planet('feed_timeout', 20)
# template options
define_tmpl_int('days_per_page', 0)
define_tmpl_int('items_per_page', 60)
define_tmpl('encoding', 'utf-8')
# prevent re-initialization
setattr(config, '__init__', lambda: None)
def load(file):
""" initialize and load a configuration"""
__init__()
global parser
parser = ConfigParser()
parser.read(file)
def template_files():
""" list the templates defined """
return parser.get('Planet','template_files').split(' ')
def feeds():
""" list the feeds defined """
return filter(lambda feed: feed!='Planet' and feed not in template_files(),
parser.sections())
def planet_options():
""" dictionary of planet wide options"""
return dict(map(lambda opt: (opt, parser.get('Planet',opt)),
parser.options('Planet')))
def feed_options(section):
""" dictionary of feed specific options"""
from planet import config
options = dict([(key,value) for key,value in planet_options().items()
if key not in planet_predefined_options])
if parser.has_section(section):
options.update(dict(map(lambda opt: (opt, parser.get(section,opt)),
parser.options(section))))
return options
def template_options(section):
""" dictionary of template specific options"""
return feed_options(section)
def write(file=sys.stdout):
""" write out an updated template """
print parser.write(file)

3656
planet/feedparser.py Executable file

File diff suppressed because it is too large Load Diff

195
planet/reconstitute.py Normal file
View File

@ -0,0 +1,195 @@
"""
Reconstitute an entry document from the output of the Universal Feed Parser.
The main entry point is called 'reconstitute'. Input parameters are:
results: this is the entire hash table return by the UFP
entry: this is the entry in the hash that you want reconstituted
The value returned is an XML DOM. Every effort is made to convert
everything to unicode, and text fields into either plain text or
well formed XHTML.
Todo:
* extension elements
"""
import re, time, md5, sgmllib
from xml.sax.saxutils import escape
from xml.dom import minidom
from BeautifulSoup import BeautifulSoup
from xml.parsers.expat import ExpatError
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
def createTextElement(parent, name, value):
""" utility function to create a child element with the specified text"""
if not value: return
xdoc = parent.ownerDocument
xelement = xdoc.createElement(name)
xelement.appendChild(xdoc.createTextNode(value))
parent.appendChild(xelement)
def invalidate(c):
""" replace invalid characters """
return '<acronym title="U+%s">\xef\xbf\xbd</acronym>' % \
hex(ord(c.group(0)))[2:].rjust(4,'0')
def ncr2c(value):
""" convert numeric character references to characters """
value=value.group(1)
if value.startswith('x'):
value=unichr(int(value[1:],16))
else:
value=unichr(int(value))
return value
def normalize(text, bozo):
""" convert everything to well formed XML """
if text.has_key('type'):
if text.type.lower().find('html')<0:
text['value'] = escape(text.value)
text['type'] = 'text/html'
if text.type.lower() == 'text/html' or bozo:
dom=BeautifulSoup(text.value,convertEntities="html")
for tag in dom.findAll(True):
for attr,value in tag.attrs:
value=sgmllib.charref.sub(ncr2c,value)
value=illegal_xml_chars.sub(u'\uFFFD',value)
tag[attr]=value
text['value'] = illegal_xml_chars.sub(invalidate, str(dom))
return text
def id(xentry, entry):
""" copy or compute an id for the entry """
if entry.has_key("id"):
entry_id = entry.id
elif entry.has_key("link"):
entry_id = entry.link
elif entry.has_key("title"):
entry_id = (entry.title_detail.base + "/" +
md5.new(entry.title).hexdigest())
elif entry.has_key("summary"):
entry_id = (entry.summary_detail.base + "/" +
md5.new(entry.summary).hexdigest())
elif entry.has_key("content"):
entry_id = (entry.content[0].base + "/" +
md5.new(entry.content[0].value).hexdigest())
else:
return
if xentry: createTextElement(xentry, 'id', entry_id)
return entry_id
def links(xentry, entry):
""" copy links to the entry """
if not entry.has_key('links'): return
xdoc = xentry.ownerDocument
for link in entry.links:
xlink = xdoc.createElement('link')
xlink.setAttribute('type', link.type)
xlink.setAttribute('href', link.href)
xlink.setAttribute('rel', link.rel)
xentry.appendChild(xlink)
def date(xentry, name, parsed):
""" insert a date-formated element into the entry """
if not parsed: return
formatted = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsed)
createTextElement(xentry, name, formatted)
def author(xentry, name, detail):
""" insert an author-like element into the entry """
if not detail: return
xdoc = xentry.ownerDocument
xauthor = xdoc.createElement(name)
createTextElement(xauthor, 'name', detail.get('name', None))
createTextElement(xauthor, 'email', detail.get('email', None))
createTextElement(xauthor, 'uri', detail.get('href', None))
xentry.appendChild(xauthor)
def content(xentry, name, detail, bozo):
""" insert a content-like element into the entry """
if not detail or not detail.value: return
normalize(detail, bozo)
xdoc = xentry.ownerDocument
xcontent = xdoc.createElement(name)
try:
# see if the resulting text is a well-formed XML fragment
div = '<div xmlns="http://www.w3.org/1999/xhtml">%s</div>'
if isinstance(detail.value,unicode):
detail.value=detail.value.encode('utf-8')
data = minidom.parseString(div % detail.value).documentElement
if detail.value.find('<') < 0:
xcontent.appendChild(data.firstChild)
else:
xcontent.setAttribute('type', 'xhtml')
xcontent.appendChild(data)
except ExpatError:
# leave as html
xcontent.setAttribute('type', 'html')
xcontent.appendChild(xdoc.createTextNode(detail.value.decode('utf-8')))
if detail.language:
xcontent.setAttribute('xml:lang', detail.language)
xentry.appendChild(xcontent)
def source(xentry, source, bozo):
""" copy source information to the entry """
xdoc = xentry.ownerDocument
xsource = xdoc.createElement('source')
createTextElement(xsource, 'id', source.get('id', None))
createTextElement(xsource, 'icon', source.get('icon', None))
createTextElement(xsource, 'logo', source.get('logo', None))
author(xsource, 'author', source.get('author_detail',None))
for contributor in source.get('contributors',[]):
author(xsource, 'contributor', contributor)
links(xsource, source)
content(xsource, 'rights', source.get('rights_detail',None), bozo)
content(xsource, 'subtitle', source.get('subtitle_detail',None), bozo)
content(xsource, 'title', source.get('title_detail',None), bozo)
date(xsource, 'updated', source.get('updated_parsed',None))
# propagate planet inserted information
for key, value in source.items():
if key.startswith('planet:'):
createTextElement(xsource, key, value)
xentry.appendChild(xsource)
def reconstitute(feed, entry):
""" create an entry document from a parsed feed """
xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
xentry=xdoc.documentElement
xentry.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
id(xentry, entry)
links(xentry, entry)
bozo = feed.bozo
content(xentry, 'title', entry.get('title_detail',None), bozo)
content(xentry, 'summary', entry.get('summary_detail',None), bozo)
content(xentry, 'content', entry.get('content',[None])[0], bozo)
content(xentry, 'rights', entry.get('rights_detail',None), bozo)
date(xentry, 'updated', entry.get('updated_parsed',time.gmtime()))
date(xentry, 'published', entry.get('published_parsed',None))
author(xentry, 'author', entry.get('author_detail',None))
for contributor in entry.get('contributors',[]):
author(xentry, 'contributor', contributor)
source(xentry, entry.get('source', feed.feed), bozo)
return xdoc

86
planet/spider.py Normal file
View File

@ -0,0 +1,86 @@
"""
Fetch either a single feed, or a set of feeds, normalize to Atom and XHTML,
and write each as a set of entries in a cache directory.
"""
from planet import config, feedparser, reconstitute
import time, calendar, re, os
try:
from xml.dom.ext import PrettyPrint
except:
PrettyPrint = None
# Regular expressions to sanitise cache filenames
re_url_scheme = re.compile(r'^[^:]*://')
re_slash = re.compile(r'[?/]+')
re_initial_cruft = re.compile(r'^[,.]*')
re_final_cruft = re.compile(r'[,.]*$')
def filename(directory, filename):
"""Return a filename suitable for the cache.
Strips dangerous and common characters to create a filename we
can use to store the cache in.
"""
try:
if re_url_scheme.match(filename):
if isinstance(filename,str):
filename=filename.decode('utf-8').encode('idna')
else:
filename=filename.encode('idna')
except:
pass
filename = re_url_scheme.sub("", filename)
filename = re_slash.sub(",", filename)
filename = re_initial_cruft.sub("", filename)
filename = re_final_cruft.sub("", filename)
return os.path.join(directory, filename)
def spiderFeed(feed):
""" Spider (fetch) a single feed """
data = feedparser.parse(feed)
cache = config.cache_directory()
# capture data from the planet configuration file
for name, value in config.feed_options(feed).items():
data.feed['planet:'+name] = value
for entry in data.entries:
if not entry.has_key('id'):
entry['id'] = reconstitute.id(None, entry)
if not entry['id']: continue
out = filename(cache, entry.id)
if entry.has_key('updated_parsed'):
mtime = calendar.timegm(entry.updated_parsed)
else:
try:
mtime = os.stat(out).st_mtime
except:
mtime = time.time()
entry['updated_parsed'] = time.gmtime(mtime)
xml = reconstitute.reconstitute(data, entry)
file = open(out,'w')
if PrettyPrint:
PrettyPrint(reconstitute.reconstitute(data, entry), file)
else:
file.write(reconstitute.reconstitute(data, entry).toxml('utf-8'))
file.close()
os.utime(out, (mtime, mtime))
def spiderPlanet(configFile):
""" Spider (fetch) an entire planet """
import planet
config.load(configFile)
log = planet.getLogger(config.log_level())
planet.setTimeout(config.feed_timeout())
for feed in config.feeds():
log.info("Updating feed %s", feed)
spiderFeed(feed)

46
planet/splice.py Normal file
View File

@ -0,0 +1,46 @@
""" Splice together a planet from a cache of feed entries """
import glob, os
from planet import config
from xml.dom import minidom
from reconstitute import createTextElement
def splice(configFile):
""" Splice together a planet from a cache of entries """
import planet
config.load(configFile)
log = planet.getLogger(config.log_level())
cache = config.cache_directory()
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")]
dir.sort()
dir.reverse()
items=max([config.items_per_page(templ)
for templ in config.template_files()])
doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>')
feed = doc.documentElement
# insert feed information
createTextElement(feed, 'title', config.name())
# insert entry information
for mtime,file in dir[:items]:
entry=minidom.parse(file)
feed.appendChild(entry.documentElement)
# insert subscription information
feed.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
for sub in config.feeds():
name = config.feed_options(sub).get('name','')
xsub = doc.createElement('planet:subscription')
xlink = doc.createElement('link')
xlink.setAttribute('rel','self')
xlink.setAttribute('href',sub.decode('utf-8'))
xsub.appendChild(xlink)
xname = doc.createElement('planet:name')
xname.appendChild(doc.createTextNode(name.decode('utf-8')))
xsub.appendChild(xname)
feed.appendChild(xsub)
return doc

424
planet/timeoutsocket.py Normal file
View File

@ -0,0 +1,424 @@
####
# Copyright 2000,2001 by Timothy O'Malley <timo@alum.mit.edu>
#
# All Rights Reserved
#
# Permission to use, copy, modify, and distribute this software
# and its documentation for any purpose and without fee is hereby
# granted, provided that the above copyright notice appear in all
# copies and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Timothy O'Malley not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
#
####
"""Timeout Socket
This module enables a timeout mechanism on all TCP connections. It
does this by inserting a shim into the socket module. After this module
has been imported, all socket creation goes through this shim. As a
result, every TCP connection will support a timeout.
The beauty of this method is that it immediately and transparently
enables the entire python library to support timeouts on TCP sockets.
As an example, if you wanted to SMTP connections to have a 20 second
timeout:
import timeoutsocket
import smtplib
timeoutsocket.setDefaultSocketTimeout(20)
The timeout applies to the socket functions that normally block on
execution: read, write, connect, and accept. If any of these
operations exceeds the specified timeout, the exception Timeout
will be raised.
The default timeout value is set to None. As a result, importing
this module does not change the default behavior of a socket. The
timeout mechanism only activates when the timeout has been set to
a numeric value. (This behavior mimics the behavior of the
select.select() function.)
This module implements two classes: TimeoutSocket and TimeoutFile.
The TimeoutSocket class defines a socket-like object that attempts to
avoid the condition where a socket may block indefinitely. The
TimeoutSocket class raises a Timeout exception whenever the
current operation delays too long.
The TimeoutFile class defines a file-like object that uses the TimeoutSocket
class. When the makefile() method of TimeoutSocket is called, it returns
an instance of a TimeoutFile.
Each of these objects adds two methods to manage the timeout value:
get_timeout() --> returns the timeout of the socket or file
set_timeout() --> sets the timeout of the socket or file
As an example, one might use the timeout feature to create httplib
connections that will timeout after 30 seconds:
import timeoutsocket
import httplib
H = httplib.HTTP("www.python.org")
H.sock.set_timeout(30)
Note: When used in this manner, the connect() routine may still
block because it happens before the timeout is set. To avoid
this, use the 'timeoutsocket.setDefaultSocketTimeout()' function.
Good Luck!
"""
__version__ = "$Revision: 1.1.1.1 $"
__author__ = "Timothy O'Malley <timo@alum.mit.edu>"
#
# Imports
#
import select, string
import socket
if not hasattr(socket, "_no_timeoutsocket"):
_socket = socket.socket
else:
_socket = socket._no_timeoutsocket
#
# Set up constants to test for Connected and Blocking operations.
# We delete 'os' and 'errno' to keep our namespace clean(er).
# Thanks to Alex Martelli and G. Li for the Windows error codes.
#
import os
if os.name == "nt":
_IsConnected = ( 10022, 10056 )
_ConnectBusy = ( 10035, )
_AcceptBusy = ( 10035, )
else:
import errno
_IsConnected = ( errno.EISCONN, )
_ConnectBusy = ( errno.EINPROGRESS, errno.EALREADY, errno.EWOULDBLOCK )
_AcceptBusy = ( errno.EAGAIN, errno.EWOULDBLOCK )
del errno
del os
#
# Default timeout value for ALL TimeoutSockets
#
_DefaultTimeout = None
def setDefaultSocketTimeout(timeout):
global _DefaultTimeout
_DefaultTimeout = timeout
def getDefaultSocketTimeout():
return _DefaultTimeout
#
# Exceptions for socket errors and timeouts
#
Error = socket.error
class Timeout(Exception):
pass
#
# Factory function
#
from socket import AF_INET, SOCK_STREAM
def timeoutsocket(family=AF_INET, type=SOCK_STREAM, proto=None):
if family != AF_INET or type != SOCK_STREAM:
if proto:
return _socket(family, type, proto)
else:
return _socket(family, type)
return TimeoutSocket( _socket(family, type), _DefaultTimeout )
# end timeoutsocket
#
# The TimeoutSocket class definition
#
class TimeoutSocket:
"""TimeoutSocket object
Implements a socket-like object that raises Timeout whenever
an operation takes too long.
The definition of 'too long' can be changed using the
set_timeout() method.
"""
_copies = 0
_blocking = 1
def __init__(self, sock, timeout):
self._sock = sock
self._timeout = timeout
# end __init__
def __getattr__(self, key):
return getattr(self._sock, key)
# end __getattr__
def get_timeout(self):
return self._timeout
# end set_timeout
def set_timeout(self, timeout=None):
self._timeout = timeout
# end set_timeout
def setblocking(self, blocking):
self._blocking = blocking
return self._sock.setblocking(blocking)
# end set_timeout
def connect_ex(self, addr):
errcode = 0
try:
self.connect(addr)
except Error, why:
errcode = why[0]
return errcode
# end connect_ex
def connect(self, addr, port=None, dumbhack=None):
# In case we were called as connect(host, port)
if port != None: addr = (addr, port)
# Shortcuts
sock = self._sock
timeout = self._timeout
blocking = self._blocking
# First, make a non-blocking call to connect
try:
sock.setblocking(0)
sock.connect(addr)
sock.setblocking(blocking)
return
except Error, why:
# Set the socket's blocking mode back
sock.setblocking(blocking)
# If we are not blocking, re-raise
if not blocking:
raise
# If we are already connected, then return success.
# If we got a genuine error, re-raise it.
errcode = why[0]
if dumbhack and errcode in _IsConnected:
return
elif errcode not in _ConnectBusy:
raise
# Now, wait for the connect to happen
# ONLY if dumbhack indicates this is pass number one.
# If select raises an error, we pass it on.
# Is this the right behavior?
if not dumbhack:
r,w,e = select.select([], [sock], [], timeout)
if w:
return self.connect(addr, dumbhack=1)
# If we get here, then we should raise Timeout
raise Timeout("Attempted connect to %s timed out." % str(addr) )
# end connect
def accept(self, dumbhack=None):
# Shortcuts
sock = self._sock
timeout = self._timeout
blocking = self._blocking
# First, make a non-blocking call to accept
# If we get a valid result, then convert the
# accept'ed socket into a TimeoutSocket.
# Be carefult about the blocking mode of ourselves.
try:
sock.setblocking(0)
newsock, addr = sock.accept()
sock.setblocking(blocking)
timeoutnewsock = self.__class__(newsock, timeout)
timeoutnewsock.setblocking(blocking)
return (timeoutnewsock, addr)
except Error, why:
# Set the socket's blocking mode back
sock.setblocking(blocking)
# If we are not supposed to block, then re-raise
if not blocking:
raise
# If we got a genuine error, re-raise it.
errcode = why[0]
if errcode not in _AcceptBusy:
raise
# Now, wait for the accept to happen
# ONLY if dumbhack indicates this is pass number one.
# If select raises an error, we pass it on.
# Is this the right behavior?
if not dumbhack:
r,w,e = select.select([sock], [], [], timeout)
if r:
return self.accept(dumbhack=1)
# If we get here, then we should raise Timeout
raise Timeout("Attempted accept timed out.")
# end accept
def send(self, data, flags=0):
sock = self._sock
if self._blocking:
r,w,e = select.select([],[sock],[], self._timeout)
if not w:
raise Timeout("Send timed out")
return sock.send(data, flags)
# end send
def recv(self, bufsize, flags=0):
sock = self._sock
if self._blocking:
r,w,e = select.select([sock], [], [], self._timeout)
if not r:
raise Timeout("Recv timed out")
return sock.recv(bufsize, flags)
# end recv
def makefile(self, flags="r", bufsize=-1):
self._copies = self._copies +1
return TimeoutFile(self, flags, bufsize)
# end makefile
def close(self):
if self._copies <= 0:
self._sock.close()
else:
self._copies = self._copies -1
# end close
# end TimeoutSocket
class TimeoutFile:
"""TimeoutFile object
Implements a file-like object on top of TimeoutSocket.
"""
def __init__(self, sock, mode="r", bufsize=4096):
self._sock = sock
self._bufsize = 4096
if bufsize > 0: self._bufsize = bufsize
if not hasattr(sock, "_inqueue"): self._sock._inqueue = ""
# end __init__
def __getattr__(self, key):
return getattr(self._sock, key)
# end __getattr__
def close(self):
self._sock.close()
self._sock = None
# end close
def write(self, data):
self.send(data)
# end write
def read(self, size=-1):
_sock = self._sock
_bufsize = self._bufsize
while 1:
datalen = len(_sock._inqueue)
if datalen >= size >= 0:
break
bufsize = _bufsize
if size > 0:
bufsize = min(bufsize, size - datalen )
buf = self.recv(bufsize)
if not buf:
break
_sock._inqueue = _sock._inqueue + buf
data = _sock._inqueue
_sock._inqueue = ""
if size > 0 and datalen > size:
_sock._inqueue = data[size:]
data = data[:size]
return data
# end read
def readline(self, size=-1):
_sock = self._sock
_bufsize = self._bufsize
while 1:
idx = string.find(_sock._inqueue, "\n")
if idx >= 0:
break
datalen = len(_sock._inqueue)
if datalen >= size >= 0:
break
bufsize = _bufsize
if size > 0:
bufsize = min(bufsize, size - datalen )
buf = self.recv(bufsize)
if not buf:
break
_sock._inqueue = _sock._inqueue + buf
data = _sock._inqueue
_sock._inqueue = ""
if idx >= 0:
idx = idx + 1
_sock._inqueue = data[idx:]
data = data[:idx]
elif size > 0 and datalen > size:
_sock._inqueue = data[size:]
data = data[:size]
return data
# end readline
def readlines(self, sizehint=-1):
result = []
data = self.read()
while data:
idx = string.find(data, "\n")
if idx >= 0:
idx = idx + 1
result.append( data[:idx] )
data = data[idx:]
else:
result.append( data )
data = ""
return result
# end readlines
def flush(self): pass
# end TimeoutFile
#
# Silently replace the socket() builtin function with
# our timeoutsocket() definition.
#
if not hasattr(socket, "_no_timeoutsocket"):
socket._no_timeoutsocket = socket.socket
socket.socket = timeoutsocket
del socket
socket = timeoutsocket
# Finis

11
runtests.py Executable file
View File

@ -0,0 +1,11 @@
#!/usr/bin/env python
import glob, trace, unittest
# find all of the planet test modules
modules = map(trace.fullmodname, glob.glob('tests/test_*.py'))
# load all of the tests into a suite
suite = unittest.TestLoader().loadTestsFromNames(modules)
# run test suite
unittest.TextTestRunner().run(suite)

20
spider.py Normal file
View File

@ -0,0 +1,20 @@
"""
Main program to run just the spider portion of planet
"""
import sys
from planet import spider, config
if __name__ == '__main__':
if len(sys.argv) == 2:
# spider all feeds
spider.spiderPlanet(sys.argv[1])
elif len(sys.argv) > 2 and os.path.isdir(sys.argv[1]):
# spider selected feeds
config.load(sys.argv[1])
for feed in sys.argv[2:]:
spider.spiderFeed(feed)
else:
print "Usage:"
print " python %s config.ini [URI URI ...]" % sys.argv[0]

21
splice.py Normal file
View File

@ -0,0 +1,21 @@
"""
Main program to run just the splice portion of planet
"""
import os.path
import sys
from planet import splice
if __name__ == '__main__':
if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]):
# at the moment, we don't have template support, so we cheat and
# simply insert a XSLT processing instruction
doc = splice.splice(sys.argv[1])
pi = doc.createProcessingInstruction(
'xml-stylesheet','type="text/xsl" href="planet.xslt"')
doc.insertBefore(pi, doc.firstChild)
print doc.toxml('utf-8')
else:
print "Usage:"
print " python %s config.ini" % sys.argv[0]

0
tests/__init__.py Normal file
View File

View File

@ -0,0 +1,13 @@
[Planet]
name = Test Configuration
template_files = index.html.tmpl atom.xml.tmpl
items_per_page = 50
[index.html.tmpl]
days_per_page = 7
[feed1]
name = one
[feed2]
name = two

View File

@ -0,0 +1,13 @@
<!--
Description: author name
Expect: author_detail.email == 'john@example.com'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<author>
<email>john@example.com</email>
</author>
</entry>
</feed>

View File

@ -0,0 +1,13 @@
<!--
Description: author name
Expect: author_detail.name == 'John Doe'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<author>
<name>John Doe</name>
</author>
</entry>
</feed>

View File

@ -0,0 +1,13 @@
<!--
Description: author name
Expect: author_detail.href == 'http://example.com/~john/'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<author>
<uri>http://example.com/~john/</uri>
</author>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: entity encoded html content
Expect: content[0].value == u'D\xe9tente' and content[0].type=='text/plain'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<content type="html">D&amp;eacute;tente</content>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: illegal control character
Expect: content[0].value == u'Page 1<acronym title="U+000c">\ufffd</acronym>Page 2'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<content type="html">Page 1&#12;Page 2</content>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: content value
Expect: content[0].language == 'en-us'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<content xml:lang="en-us">foo</content>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: improperly nested tags
Expect: content[0].value == 'This is <b><i>very</i></b> confused'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<content type="html">This is &lt;B&gt;&lt;i;&gt;very&lt;/b&gt;&lt;/I&gt; confused</content>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: plain text content
Expect: content[0].value == 'AT&T'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<content type="text">AT&amp;T</content>
</entry>
</feed>

View File

@ -0,0 +1,13 @@
<!--
Description: xhtml content
Expect: content[0].value == 'A <b>very</b> bad day'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">A <b>very</b> bad day</div>
</content>
</entry>
</feed>

View File

@ -0,0 +1,13 @@
<!--
Description: contributor name
Expect: contributors[0].email == 'john@example.com'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<contributor>
<email>john@example.com</email>
</contributor>
</entry>
</feed>

View File

@ -0,0 +1,13 @@
<!--
Description: contributor name
Expect: contributors[0].name == 'John Doe'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<contributor>
<name>John Doe</name>
</contributor>
</entry>
</feed>

View File

@ -0,0 +1,13 @@
<!--
Description: contributor name
Expect: contributors[0].href == 'http://example.com/~john/'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<contributor>
<uri>http://example.com/~john/</uri>
</contributor>
</entry>
</feed>

View File

@ -0,0 +1,11 @@
<!--
Description: id
Expect: id == 'http://example.com/1'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<id>http://example.com/1</id>
</entry>
</feed>

View File

@ -0,0 +1,13 @@
<!--
Description: id generated from content
Expect: id == 'http://example.com//9a0364b9e99bb480dd25e1f0284c8555'
-->
<rss xml:base="http://example.com/">
<channel>
<item xmlns:content="http://purl.org/rss/1.0/modules/content/">
<content:encoded>content</content>
</item>
</channel>
</rss>

View File

@ -0,0 +1,13 @@
<!--
Description: id generated from description
Expect: id == 'http://example.com//67daf92c833c41c95db874e18fcb2786'
-->
<rss xml:base="http://example.com/">
<channel>
<item>
<description>description</description>
</item>
</channel>
</rss>

View File

@ -0,0 +1,13 @@
<!--
Description: id generated from link
Expect: id == 'http://example.com/1'
-->
<rss>
<channel>
<item>
<link>http://example.com/1</link>
</item>
</channel>
</rss>

View File

@ -0,0 +1,13 @@
<!--
Description: id generated from title
Expect: id == 'http://example.com//d5d3db1765287eef77d7927cc956f50a'
-->
<rss xml:base="http://example.com/">
<channel>
<item>
<title>title</title>
</item>
</channel>
</rss>

View File

@ -0,0 +1,11 @@
<!--
Description: link relationship
Expect: links[0].href == 'http://example.com/1'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<link href="http://example.com/1"/>
</entry>
</feed>

View File

@ -0,0 +1,11 @@
<!--
Description: link relationship
Expect: links[0].rel == 'alternate'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<link href="http://example.com/1"/>
</entry>
</feed>

View File

@ -0,0 +1,11 @@
<!--
Description: link relationship
Expect: links[0].type == 'text/html'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<link href="http://example.com/1"/>
</entry>
</feed>

View File

@ -0,0 +1,11 @@
<!--
Description: published, rollover past midnight on feb 28 in leap year
Expect: published_parsed == (2004, 2, 29, 2, 14, 55, 6, 60, 0)
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<published>2004-02-28T18:14:55-08:00</published>
</entry>
</feed>

View File

@ -0,0 +1,11 @@
<!--
Description: rights
Expect: rights == u'\xa9 2006'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<rights type="html">&amp;copy; 2006</rights>
</entry>
</feed>

View File

@ -0,0 +1,12 @@
<!--
Description: source author
Expect: source.author_detail.name == 'John Doe'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<author>
<name>John Doe</name>
</author>
<entry/>
</feed>

View File

@ -0,0 +1,12 @@
<!--
Description: source contributor
Expect: source.contributors[0].name == 'John Doe'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<contributor>
<name>John Doe</name>
</contributor>
<entry/>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: source icon
Expect: source.icon == 'http://www.example.com/favicon.ico'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<icon>http://www.example.com/favicon.ico</icon>
<entry/>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: source id
Expect: source.id == 'http://example.com/'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<id>http://example.com/</id>
<entry/>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: source link
Expect: source.links[0].href == 'http://example.com/atom.xml'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel='self' href='http://example.com/atom.xml'/>
<entry/>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: source logo
Expect: source.logo == 'http://www.example.com/logo.jpg'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<logo>http://www.example.com/logo.jpg</logo>
<entry/>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: source rights
Expect: source.rights == u'\xa9 2006'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<rights type="html">&amp;copy; 2006</rights>
<entry/>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: source subtitle
Expect: source.subtitle == 'snarky phrase'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<subtitle>snarky phrase</subtitle>
<entry/>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: source title
Expect: source.title == 'visible name'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<title>visible name</title>
<entry/>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: source updated, rollover past midnight on feb 28 in leap year
Expect: source.updated_parsed == (2004, 2, 29, 2, 14, 55, 6, 60, 0)
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<updated>2004-02-28T18:14:55-08:00</updated>
<entry/>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: entity encoded html summary
Expect: summary_detail.value == u'D\xe9tente' and summary_detail.type=='text/plain'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<summary type="html">D&amp;eacute;tente</summary>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: summary value
Expect: summary_detail.language == 'en-us'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<summary xml:lang="en-us">foo</summary>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: plain text summary
Expect: summary_detail.value == 'AT&T'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<summary type="text">AT&amp;T</summary>
</entry>
</feed>

View File

@ -0,0 +1,13 @@
<!--
Description: xhtml summary
Expect: summary_detail.value == 'A <b>very</b> bad day'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<summary type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">A <b>very</b> bad day</div>
</summary>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: entity encoded html title
Expect: title_detail.value == u'D\xe9tente' and title_detail.type=='text/plain'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<title type="html">D&amp;eacute;tente</title>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: title value
Expect: title_detail.language == 'en-us'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<title xml:lang="en-us">foo</title>
</entry>
</feed>

View File

@ -0,0 +1,10 @@
<!--
Description: plain text title
Expect: title_detail.value == 'AT&T'
-->
<feed xmns="http://www.w3.org/2005/Atom">
<entry>
<title type="text">AT&amp;T</title>
</entry>
</feed>

View File

@ -0,0 +1,13 @@
<!--
Description: xhtml title
Expect: title_detail.value == 'A <b>very</b> bad day'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<title type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">A <b>very</b> bad day</div>
</title>
</entry>
</feed>

View File

@ -0,0 +1,11 @@
<!--
Description: updated, rollover past midnight on feb 28 in leap year
Expect: updated_parsed == (2004, 2, 29, 2, 14, 55, 6, 60, 0)
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<updated>2004-02-28T18:14:55-08:00</updated>
</entry>
</feed>

View File

@ -0,0 +1,12 @@
[Planet]
cache_directory = tests/work/spider/cache
template_files =
[tests/data/spider/testfeed1b.atom]
name = one
[tests/data/spider/testfeed2.atom]
name = two
[tests/data/spider/testfeed3.rss]
name = three

View File

@ -0,0 +1,49 @@
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="self" href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom"/>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<title>Sam Ruby</title>
<subtitle>Its just data</subtitle>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<updated>2006-06-16T20:15:18-04:00</updated>
<link href="http://www.intertwingly.net/blog/"/>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
<link href="http://example.com/1"/>
<title>Mercury</title>
<content>Messenger of the Roman Gods</content>
<updated>2006-01-01T00:00:00Z</updated>
</entry>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
<link href="http://example.com/2"/>
<title>Venus</title>
<content>the Morning Star</content>
<updated>2006-01-02T00:00:00Z</updated>
</entry>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
<link href="http://example.com/3"/>
<title>Earth</title>
<content>the Blue Planet</content>
<updated>2006-01-03T00:00:00Z</updated>
</entry>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
<link href="http://example.com/4"/>
<title>Mars</title>
<content>the Red Planet</content>
<updated>2006-01-04T00:00:00Z</updated>
</entry>
</feed>

View File

@ -0,0 +1,50 @@
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="self" href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom"/>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<title>Sam Ruby</title>
<subtitle>Its just data</subtitle>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<updated>2006-06-16T20:15:18-04:00</updated>
<link href="http://www.intertwingly.net/blog/"/>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
<link href="http://example.com/1"/>
<title>Mercury</title>
<content>Messenger of the Roman Gods</content>
<updated>2006-01-01T00:00:00Z</updated>
</entry>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
<link href="http://example.com/2"/>
<title>Venus</title>
<content>the Jewel of the Sky</content>
<published>2006-01-02T00:00:00Z</published>
<updated>2006-02-02T00:00:00Z</updated>
</entry>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
<link href="http://example.com/3"/>
<title>Earth</title>
<content>the Blue Planet</content>
<updated>2006-01-03T00:00:00Z</updated>
</entry>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
<link href="http://example.com/4"/>
<title>Mars</title>
<content>the Red Planet</content>
<updated>2006-01-04T00:00:00Z</updated>
</entry>
</feed>

View File

@ -0,0 +1,49 @@
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="self" href="http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom"/>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<title>Sam Ruby</title>
<subtitle>Its just data</subtitle>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<updated>2006-06-16T20:15:18-04:00</updated>
<link href="http://www.intertwingly.net/blog/"/>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed2/1</id>
<link href="http://example.com/1"/>
<title>Mercury</title>
<content>Messenger of the Roman Gods</content>
<updated>2006-01-01T00:00:00Z</updated>
</entry>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed2/2</id>
<link href="http://example.com/2"/>
<title>Venus</title>
<content>the Morning Star</content>
<updated>2006-01-02T00:00:00Z</updated>
</entry>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed2/3</id>
<link href="http://example.com/3"/>
<title>Earth</title>
<content>the Blue Planet</content>
<updated>2006-01-03T00:00:00Z</updated>
</entry>
<entry>
<id>tag:planet.intertwingly.net,2006:testfeed2/4</id>
<link href="http://example.com/4"/>
<title>Mars</title>
<content>the Red Planet</content>
<updated>2006-01-04T00:00:00Z</updated>
</entry>
</feed>

View File

@ -0,0 +1,37 @@
<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
<title>Sam Ruby</title>
<link>http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss</link>
<description>Its just data</description>
<item>
<guid>tag:planet.intertwingly.net,2006:testfeed3/1</guid>
<link href="http://example.com/1"/>
<title>Mercury</title>
<description>Messenger of the Roman Gods</description>
<pubDate>Sun, 01 Jan 2006 00:00:00 +0000</pubDate>
</item>
<item>
<guid>tag:planet.intertwingly.net,2006:testfeed3/2</guid>
<link>http://example.com/2</link>
<title>Venus</title>
<description>the Morning Star</description>
</item>
<item>
<link>http://example.com/3</link>
<title>Earth</title>
<description>the Blue Planet</description>
<pubDate>Tue, 03 Jan 2006 00:00:00 +0000</pubDate>
</item>
<entry>
<link href="http://example.com/4"/>
<title>Mars</title>
<description>the Red Planet</description>
</entry>
</feed>

View File

@ -0,0 +1,22 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
<link href='http://example.com/1' type='text/html' rel='alternate'/>
<title>Mercury</title>
<content>Messenger of the Roman Gods</content>
<updated>2006-01-01T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</source>
</entry>

View File

@ -0,0 +1,23 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
<link href='http://example.com/2' type='text/html' rel='alternate'/>
<title>Venus</title>
<content>the Jewel of the Sky</content>
<updated>2006-02-02T00:00:00Z</updated>
<published>2006-01-02T00:00:00Z</published>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</source>
</entry>

View File

@ -0,0 +1,22 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
<link href='http://example.com/3' type='text/html' rel='alternate'/>
<title>Earth</title>
<content>the Blue Planet</content>
<updated>2006-01-03T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</source>
</entry>

View File

@ -0,0 +1,22 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
<link href='http://example.com/4' type='text/html' rel='alternate'/>
<title>Mars</title>
<content>the Red Planet</content>
<updated>2006-01-04T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</source>
</entry>

View File

@ -0,0 +1,22 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2/1</id>
<link href='http://example.com/1' type='text/html' rel='alternate'/>
<title>Mercury</title>
<content>Messenger of the Roman Gods</content>
<updated>2006-01-01T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</source>
</entry>

View File

@ -0,0 +1,22 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2/2</id>
<link href='http://example.com/2' type='text/html' rel='alternate'/>
<title>Venus</title>
<content>the Morning Star</content>
<updated>2006-01-02T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</source>
</entry>

View File

@ -0,0 +1,22 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2/3</id>
<link href='http://example.com/3' type='text/html' rel='alternate'/>
<title>Earth</title>
<content>the Blue Planet</content>
<updated>2006-01-03T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</source>
</entry>

View File

@ -0,0 +1,22 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2/4</id>
<link href='http://example.com/4' type='text/html' rel='alternate'/>
<title>Mars</title>
<content>the Red Planet</content>
<updated>2006-01-04T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</source>
</entry>

View File

@ -0,0 +1,11 @@
[Planet]
name = test planet
cache_directory = tests/data/splice/cache
template_files =
[tests/data/spider/testfeed1b.atom]
name = one
[tests/data/spider/testfeed2.atom]
name = two

52
tests/test_config.py Normal file
View File

@ -0,0 +1,52 @@
#!/usr/bin/env python
import unittest
from planet import config
workdir = 'tests/work/spider/cache'
class ConfigTest(unittest.TestCase):
def setUp(self):
config.load('tests/data/config/basic.ini')
# administrivia
def test_template(self):
self.assertEqual(['index.html.tmpl', 'atom.xml.tmpl'],
config.template_files())
def test_feeds(self):
self.assertEqual(['feed1', 'feed2'], config.feeds())
# planet wide configuration
def test_name(self):
self.assertEqual('Test Configuration', config.name())
def test_link(self):
self.assertEqual('Unconfigured Planet', config.link())
# per template configuration
def test_days_per_page(self):
self.assertEqual(7, config.days_per_page('index.html.tmpl'))
self.assertEqual(0, config.days_per_page('atom.xml.tmpl'))
def test_items_per_page(self):
self.assertEqual(50, config.items_per_page('index.html.tmpl'))
self.assertEqual(50, config.items_per_page('atom.xml.tmpl'))
def test_encoding(self):
self.assertEqual('utf-8', config.encoding('index.html.tmpl'))
self.assertEqual('utf-8', config.encoding('atom.xml.tmpl'))
# dictionaries
def test_feed_options(self):
self.assertEqual('one', config.feed_options('feed1')['name'])
self.assertEqual('two', config.feed_options('feed2')['name'])
def test_template_options(self):
option = config.template_options('index.html.tmpl')
self.assertEqual('7', option['days_per_page'])
self.assertEqual('50', option['items_per_page'])

View File

@ -0,0 +1,37 @@
#!/usr/bin/env python
import unittest, os, sys, glob, new, re, StringIO, time
from planet import feedparser
from planet.reconstitute import reconstitute
testfiles = 'tests/data/reconstitute/%s.xml'
class ReconstituteTest(unittest.TestCase):
desc_re = re.compile("Description:\s*(.*?)\s*Expect:\s*(.*)\s*-->")
def eval(self, name):
# read the test case
try:
testcase = open(testfiles % name)
data = testcase.read()
description, expect = self.desc_re.search(data).groups()
testcase.close()
except:
raise RuntimeError, "can't parse %s" % name
# parse and reconstitute to a string
work = StringIO.StringIO()
results = feedparser.parse(data)
reconstitute(results, results.entries[0]).writexml(work)
# verify the results
results = feedparser.parse(work.getvalue().encode('utf-8'))
self.assertFalse(results.bozo, 'xml is well formed')
self.assertTrue(eval(expect, results.entries[0]), expect)
# build a test method for each test file
for testcase in glob.glob(testfiles % '*'):
root = os.path.splitext(os.path.basename(testcase))[0]
func = lambda self, name=root: self.eval(name)
method = new.instancemethod(func, None, ReconstituteTest)
setattr(ReconstituteTest, "test_" + root, method)

65
tests/test_spider.py Normal file
View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
import unittest, os, glob, calendar
from planet.spider import filename, spiderFeed, spiderPlanet
from planet import feedparser, config
workdir = 'tests/work/spider/cache'
testfeed = 'tests/data/spider/testfeed%s.atom'
configfile = 'tests/data/spider/config.ini'
class SpiderTest(unittest.TestCase):
def setUp(self):
try:
os.makedirs(workdir)
except:
self.tearDown()
os.makedirs(workdir)
def tearDown(self):
for file in glob.glob(workdir+"/*"):
os.unlink(file)
os.removedirs(workdir)
def test_filename(self):
self.assertEqual('./example.com,index.html',
filename('.', 'http://example.com/index.html'))
self.assertEqual('./www.xn--8ws00zhy3a.com',
filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))
def test_spiderFeed(self):
config.load(configfile)
spiderFeed(testfeed % '1b')
files = glob.glob(workdir+"/*")
# verify that exactly four files were produced
self.assertEqual(4, len(files))
# verify that the file names are as expected
self.assertTrue(workdir +
'/tag:planet.intertwingly.net,2006:testfeed1,1' in files)
# verify that the file timestamps match atom:updated
for file in files:
data = feedparser.parse(file)
self.assertTrue(data.entries[0].source.planet_name)
self.assertEqual(os.stat(file).st_mtime,
calendar.timegm(data.entries[0].updated_parsed))
def test_spiderUpdate(self):
spiderFeed(testfeed % '1a')
self.test_spiderFeed()
def test_spiderPlanet(self):
spiderPlanet(configfile)
files = glob.glob(workdir+"/*")
# verify that exactly eight files were produced
self.assertEqual(12, len(files))
# verify that the file names are as expected
self.assertTrue(workdir +
'/tag:planet.intertwingly.net,2006:testfeed1,1' in files)
self.assertTrue(workdir +
'/tag:planet.intertwingly.net,2006:testfeed2,1' in files)

17
tests/test_splice.py Normal file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env python
import unittest
from planet.splice import splice
configfile = 'tests/data/splice/config.ini'
class SpliceTest(unittest.TestCase):
def test_splice(self):
doc = splice(configfile)
self.assertEqual(8,len(doc.getElementsByTagName('entry')))
self.assertEqual(2,len(doc.getElementsByTagName('planet:subscription')))
self.assertEqual(10,len(doc.getElementsByTagName('planet:name')))
self.assertEqual('test planet',
doc.getElementsByTagName('title')[0].firstChild.nodeValue)