Initial load
This commit is contained in:
commit
b31973d514
BIN
examples/images/feed-icon-10x10.png
Normal file
BIN
examples/images/feed-icon-10x10.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 469 B |
BIN
examples/images/logo.png
Normal file
BIN
examples/images/logo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.3 KiB |
150
examples/planet.css
Normal file
150
examples/planet.css
Normal file
@ -0,0 +1,150 @@
|
||||
body {
|
||||
border-right: 1px solid black;
|
||||
margin-right: 200px;
|
||||
|
||||
padding-left: 20px;
|
||||
padding-right: 20px;
|
||||
}
|
||||
|
||||
h1 {
|
||||
margin-top: 0px;
|
||||
padding-top: 20px;
|
||||
|
||||
font-family: "Bitstream Vera Sans", sans-serif;
|
||||
font-weight: normal;
|
||||
letter-spacing: -2px;
|
||||
text-transform: lowercase;
|
||||
text-align: right;
|
||||
|
||||
color: grey;
|
||||
}
|
||||
|
||||
.admin {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-family: "Bitstream Vera Sans", sans-serif;
|
||||
font-weight: normal;
|
||||
color: #200080;
|
||||
|
||||
margin-left: -20px;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-family: "Bitstream Vera Sans", sans-serif;
|
||||
font-weight: normal;
|
||||
|
||||
background-color: #a0c0ff;
|
||||
border: 1px solid #5080b0;
|
||||
|
||||
padding: 4px;
|
||||
}
|
||||
|
||||
h3 a {
|
||||
text-decoration: none;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
h4 {
|
||||
font-family: "Bitstream Vera Sans", sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
h4 a {
|
||||
text-decoration: none;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
img.face {
|
||||
float: right;
|
||||
margin-top: -3em;
|
||||
}
|
||||
|
||||
.entry {
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.entry .date {
|
||||
font-family: "Bitstream Vera Sans", sans-serif;
|
||||
color: grey;
|
||||
}
|
||||
|
||||
.entry .date a {
|
||||
text-decoration: none;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.sidebar {
|
||||
position: absolute;
|
||||
top: 0px;
|
||||
right: 0px;
|
||||
width: 200px;
|
||||
|
||||
margin-left: 0px;
|
||||
margin-right: 0px;
|
||||
padding-right: 0px;
|
||||
|
||||
padding-top: 20px;
|
||||
padding-left: 0px;
|
||||
|
||||
font-family: "Bitstream Vera Sans", sans-serif;
|
||||
font-size: 85%;
|
||||
}
|
||||
|
||||
.sidebar h2 {
|
||||
font-size: 110%;
|
||||
font-weight: bold;
|
||||
color: black;
|
||||
|
||||
padding-left: 5px;
|
||||
margin-left: 0px;
|
||||
}
|
||||
|
||||
.sidebar ul {
|
||||
padding-left: 1em;
|
||||
margin-left: 0px;
|
||||
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
.sidebar ul li:hover {
|
||||
color: grey;
|
||||
}
|
||||
|
||||
.sidebar ul li a {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.sidebar ul li a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.sidebar ul li a img {
|
||||
border: 0;
|
||||
}
|
||||
|
||||
.sidebar p {
|
||||
border-top: 1px solid grey;
|
||||
margin-top: 30px;
|
||||
padding-top: 10px;
|
||||
|
||||
padding-left: 5px;
|
||||
}
|
||||
|
||||
.sidebar .message {
|
||||
cursor: help;
|
||||
border-bottom: 1px dashed red;
|
||||
}
|
||||
|
||||
.sidebar a.message:hover {
|
||||
cursor: help;
|
||||
background-color: #ff0000;
|
||||
color: #ffffff !important;
|
||||
text-decoration: none !important;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
text-decoration: underline !important;
|
||||
color: blue !important;
|
||||
}
|
65
examples/planet.xslt
Normal file
65
examples/planet.xslt
Normal file
@ -0,0 +1,65 @@
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
|
||||
xmlns:atom="http://www.w3.org/2005/Atom"
|
||||
xmlns:planet="http://planet.intertwingly.net/"
|
||||
xmlns="http://www.w3.org/1999/xhtml">
|
||||
|
||||
<xsl:template match="atom:feed">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<link rel="stylesheet" href="planet.css" type="text/css" />
|
||||
<title><xsl:value-of select="atom:title"/></title>
|
||||
</head>
|
||||
<body>
|
||||
<h1><xsl:value-of select="atom:title"/></h1>
|
||||
|
||||
<xsl:apply-templates select="atom:entry"/>
|
||||
|
||||
<div class="sidebar">
|
||||
<img src="images/logo.png" width="136" height="136" alt=""/>
|
||||
|
||||
<h2>Subscriptions</h2>
|
||||
<ul>
|
||||
<xsl:for-each select="planet:subscription">
|
||||
<xsl:sort select="planet:name"/>
|
||||
<li>
|
||||
<a href="{atom:link[@rel='self']/@href}" title="subscribe">
|
||||
<img src="images/feed-icon-10x10.png" alt="(feed)"/>
|
||||
</a>
|
||||
<xsl:value-of select="planet:name"/>
|
||||
</li>
|
||||
</xsl:for-each>
|
||||
</ul>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="atom:entry">
|
||||
<xsl:variable name="date" select="substring(atom:updated,1,10)"/>
|
||||
<xsl:if test="not(preceding-sibling::atom:entry
|
||||
[substring(atom:updated,1,10) = $date])">
|
||||
<h2 class="date"><xsl:value-of select="$date"/></h2>
|
||||
</xsl:if>
|
||||
|
||||
<h3>
|
||||
<a href="{atom:source/atom:link[@rel='alternate']/@href}">
|
||||
<xsl:value-of select="atom:source/planet:name"/>
|
||||
</a>
|
||||
—
|
||||
<a href="{atom:link[@rel='alternate']/@href}">
|
||||
<xsl:value-of select="atom:title"/>
|
||||
</a>
|
||||
</h3>
|
||||
|
||||
<div class="content">
|
||||
<xsl:choose>
|
||||
<xsl:when test="atom:content">
|
||||
<p><xsl:copy-of select="atom:content/*"/></p>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<p><xsl:copy-of select="atom:summary/*"/></p>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</div>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
1824
planet/BeautifulSoup.py
Normal file
1824
planet/BeautifulSoup.py
Normal file
File diff suppressed because it is too large
Load Diff
45
planet/__init__.py
Normal file
45
planet/__init__.py
Normal file
@ -0,0 +1,45 @@
|
||||
logger = None
|
||||
|
||||
def getLogger(level):
|
||||
""" get a logger with the specified log level """
|
||||
global logger
|
||||
if logger: return logger
|
||||
|
||||
try:
|
||||
import logging
|
||||
except:
|
||||
import compat_logging as logging
|
||||
|
||||
logging.basicConfig()
|
||||
logging.getLogger().setLevel(logging.getLevelName(level))
|
||||
logger = logging.getLogger("planet.runner")
|
||||
try:
|
||||
logger.warning
|
||||
except:
|
||||
logger.warning = logger.warn
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def setTimeout(timeout):
|
||||
""" time out rather than hang forever on ultra-slow servers."""
|
||||
if timeout:
|
||||
try:
|
||||
timeout = float(timeout)
|
||||
except:
|
||||
logger.warning("Timeout set to invalid value '%s', skipping", timeout)
|
||||
timeout = None
|
||||
|
||||
if timeout:
|
||||
try:
|
||||
from planet import timeoutsocket
|
||||
timeoutsocket.setDefaultSocketTimeout(timeout)
|
||||
logger.debug("Socket timeout set to %d seconds", timeout)
|
||||
except ImportError:
|
||||
import socket
|
||||
if hasattr(socket, 'setdefaulttimeout'):
|
||||
logger.debug("timeoutsocket not found, using python function")
|
||||
socket.setdefaulttimeout(timeout)
|
||||
logger.debug("Socket timeout set to %d seconds", timeout)
|
||||
else:
|
||||
logger.error("Unable to set timeout to %d seconds", timeout)
|
1196
planet/compat_logging/__init__.py
Normal file
1196
planet/compat_logging/__init__.py
Normal file
File diff suppressed because it is too large
Load Diff
299
planet/compat_logging/config.py
Normal file
299
planet/compat_logging/config.py
Normal file
@ -0,0 +1,299 @@
|
||||
# Copyright 2001-2002 by Vinay Sajip. All Rights Reserved.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose and without fee is hereby granted,
|
||||
# provided that the above copyright notice appear in all copies and that
|
||||
# both that copyright notice and this permission notice appear in
|
||||
# supporting documentation, and that the name of Vinay Sajip
|
||||
# not be used in advertising or publicity pertaining to distribution
|
||||
# of the software without specific, written prior permission.
|
||||
# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
|
||||
# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
|
||||
# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
|
||||
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""
|
||||
Logging package for Python. Based on PEP 282 and comments thereto in
|
||||
comp.lang.python, and influenced by Apache's log4j system.
|
||||
|
||||
Should work under Python versions >= 1.5.2, except that source line
|
||||
information is not available unless 'inspect' is.
|
||||
|
||||
Copyright (C) 2001-2002 Vinay Sajip. All Rights Reserved.
|
||||
|
||||
To use, simply 'import logging' and log away!
|
||||
"""
|
||||
|
||||
import sys, logging, logging.handlers, string, thread, threading, socket, struct, os
|
||||
|
||||
from SocketServer import ThreadingTCPServer, StreamRequestHandler
|
||||
|
||||
|
||||
DEFAULT_LOGGING_CONFIG_PORT = 9030
|
||||
if sys.platform == "win32":
|
||||
RESET_ERROR = 10054 #WSAECONNRESET
|
||||
else:
|
||||
RESET_ERROR = 104 #ECONNRESET
|
||||
|
||||
#
|
||||
# The following code implements a socket listener for on-the-fly
|
||||
# reconfiguration of logging.
|
||||
#
|
||||
# _listener holds the server object doing the listening
|
||||
_listener = None
|
||||
|
||||
def fileConfig(fname, defaults=None):
|
||||
"""
|
||||
Read the logging configuration from a ConfigParser-format file.
|
||||
|
||||
This can be called several times from an application, allowing an end user
|
||||
the ability to select from various pre-canned configurations (if the
|
||||
developer provides a mechanism to present the choices and load the chosen
|
||||
configuration).
|
||||
In versions of ConfigParser which have the readfp method [typically
|
||||
shipped in 2.x versions of Python], you can pass in a file-like object
|
||||
rather than a filename, in which case the file-like object will be read
|
||||
using readfp.
|
||||
"""
|
||||
import ConfigParser
|
||||
|
||||
cp = ConfigParser.ConfigParser(defaults)
|
||||
if hasattr(cp, 'readfp') and hasattr(fname, 'readline'):
|
||||
cp.readfp(fname)
|
||||
else:
|
||||
cp.read(fname)
|
||||
#first, do the formatters...
|
||||
flist = cp.get("formatters", "keys")
|
||||
if len(flist):
|
||||
flist = string.split(flist, ",")
|
||||
formatters = {}
|
||||
for form in flist:
|
||||
sectname = "formatter_%s" % form
|
||||
opts = cp.options(sectname)
|
||||
if "format" in opts:
|
||||
fs = cp.get(sectname, "format", 1)
|
||||
else:
|
||||
fs = None
|
||||
if "datefmt" in opts:
|
||||
dfs = cp.get(sectname, "datefmt", 1)
|
||||
else:
|
||||
dfs = None
|
||||
f = logging.Formatter(fs, dfs)
|
||||
formatters[form] = f
|
||||
#next, do the handlers...
|
||||
#critical section...
|
||||
logging._acquireLock()
|
||||
try:
|
||||
try:
|
||||
#first, lose the existing handlers...
|
||||
logging._handlers.clear()
|
||||
#now set up the new ones...
|
||||
hlist = cp.get("handlers", "keys")
|
||||
if len(hlist):
|
||||
hlist = string.split(hlist, ",")
|
||||
handlers = {}
|
||||
fixups = [] #for inter-handler references
|
||||
for hand in hlist:
|
||||
sectname = "handler_%s" % hand
|
||||
klass = cp.get(sectname, "class")
|
||||
opts = cp.options(sectname)
|
||||
if "formatter" in opts:
|
||||
fmt = cp.get(sectname, "formatter")
|
||||
else:
|
||||
fmt = ""
|
||||
klass = eval(klass, vars(logging))
|
||||
args = cp.get(sectname, "args")
|
||||
args = eval(args, vars(logging))
|
||||
h = apply(klass, args)
|
||||
if "level" in opts:
|
||||
level = cp.get(sectname, "level")
|
||||
h.setLevel(logging._levelNames[level])
|
||||
if len(fmt):
|
||||
h.setFormatter(formatters[fmt])
|
||||
#temporary hack for FileHandler and MemoryHandler.
|
||||
if klass == logging.handlers.MemoryHandler:
|
||||
if "target" in opts:
|
||||
target = cp.get(sectname,"target")
|
||||
else:
|
||||
target = ""
|
||||
if len(target): #the target handler may not be loaded yet, so keep for later...
|
||||
fixups.append((h, target))
|
||||
handlers[hand] = h
|
||||
#now all handlers are loaded, fixup inter-handler references...
|
||||
for fixup in fixups:
|
||||
h = fixup[0]
|
||||
t = fixup[1]
|
||||
h.setTarget(handlers[t])
|
||||
#at last, the loggers...first the root...
|
||||
llist = cp.get("loggers", "keys")
|
||||
llist = string.split(llist, ",")
|
||||
llist.remove("root")
|
||||
sectname = "logger_root"
|
||||
root = logging.root
|
||||
log = root
|
||||
opts = cp.options(sectname)
|
||||
if "level" in opts:
|
||||
level = cp.get(sectname, "level")
|
||||
log.setLevel(logging._levelNames[level])
|
||||
for h in root.handlers[:]:
|
||||
root.removeHandler(h)
|
||||
hlist = cp.get(sectname, "handlers")
|
||||
if len(hlist):
|
||||
hlist = string.split(hlist, ",")
|
||||
for hand in hlist:
|
||||
log.addHandler(handlers[hand])
|
||||
#and now the others...
|
||||
#we don't want to lose the existing loggers,
|
||||
#since other threads may have pointers to them.
|
||||
#existing is set to contain all existing loggers,
|
||||
#and as we go through the new configuration we
|
||||
#remove any which are configured. At the end,
|
||||
#what's left in existing is the set of loggers
|
||||
#which were in the previous configuration but
|
||||
#which are not in the new configuration.
|
||||
existing = root.manager.loggerDict.keys()
|
||||
#now set up the new ones...
|
||||
for log in llist:
|
||||
sectname = "logger_%s" % log
|
||||
qn = cp.get(sectname, "qualname")
|
||||
opts = cp.options(sectname)
|
||||
if "propagate" in opts:
|
||||
propagate = cp.getint(sectname, "propagate")
|
||||
else:
|
||||
propagate = 1
|
||||
logger = logging.getLogger(qn)
|
||||
if qn in existing:
|
||||
existing.remove(qn)
|
||||
if "level" in opts:
|
||||
level = cp.get(sectname, "level")
|
||||
logger.setLevel(logging._levelNames[level])
|
||||
for h in logger.handlers[:]:
|
||||
logger.removeHandler(h)
|
||||
logger.propagate = propagate
|
||||
logger.disabled = 0
|
||||
hlist = cp.get(sectname, "handlers")
|
||||
if len(hlist):
|
||||
hlist = string.split(hlist, ",")
|
||||
for hand in hlist:
|
||||
logger.addHandler(handlers[hand])
|
||||
#Disable any old loggers. There's no point deleting
|
||||
#them as other threads may continue to hold references
|
||||
#and by disabling them, you stop them doing any logging.
|
||||
for log in existing:
|
||||
root.manager.loggerDict[log].disabled = 1
|
||||
except:
|
||||
import traceback
|
||||
ei = sys.exc_info()
|
||||
traceback.print_exception(ei[0], ei[1], ei[2], None, sys.stderr)
|
||||
del ei
|
||||
finally:
|
||||
logging._releaseLock()
|
||||
|
||||
def listen(port=DEFAULT_LOGGING_CONFIG_PORT):
|
||||
"""
|
||||
Start up a socket server on the specified port, and listen for new
|
||||
configurations.
|
||||
|
||||
These will be sent as a file suitable for processing by fileConfig().
|
||||
Returns a Thread object on which you can call start() to start the server,
|
||||
and which you can join() when appropriate. To stop the server, call
|
||||
stopListening().
|
||||
"""
|
||||
if not thread:
|
||||
raise NotImplementedError, "listen() needs threading to work"
|
||||
|
||||
class ConfigStreamHandler(StreamRequestHandler):
|
||||
"""
|
||||
Handler for a logging configuration request.
|
||||
|
||||
It expects a completely new logging configuration and uses fileConfig
|
||||
to install it.
|
||||
"""
|
||||
def handle(self):
|
||||
"""
|
||||
Handle a request.
|
||||
|
||||
Each request is expected to be a 4-byte length,
|
||||
followed by the config file. Uses fileConfig() to do the
|
||||
grunt work.
|
||||
"""
|
||||
import tempfile
|
||||
try:
|
||||
conn = self.connection
|
||||
chunk = conn.recv(4)
|
||||
if len(chunk) == 4:
|
||||
slen = struct.unpack(">L", chunk)[0]
|
||||
chunk = self.connection.recv(slen)
|
||||
while len(chunk) < slen:
|
||||
chunk = chunk + conn.recv(slen - len(chunk))
|
||||
#Apply new configuration. We'd like to be able to
|
||||
#create a StringIO and pass that in, but unfortunately
|
||||
#1.5.2 ConfigParser does not support reading file
|
||||
#objects, only actual files. So we create a temporary
|
||||
#file and remove it later.
|
||||
file = tempfile.mktemp(".ini")
|
||||
f = open(file, "w")
|
||||
f.write(chunk)
|
||||
f.close()
|
||||
fileConfig(file)
|
||||
os.remove(file)
|
||||
except socket.error, e:
|
||||
if type(e.args) != types.TupleType:
|
||||
raise
|
||||
else:
|
||||
errcode = e.args[0]
|
||||
if errcode != RESET_ERROR:
|
||||
raise
|
||||
|
||||
class ConfigSocketReceiver(ThreadingTCPServer):
|
||||
"""
|
||||
A simple TCP socket-based logging config receiver.
|
||||
"""
|
||||
|
||||
allow_reuse_address = 1
|
||||
|
||||
def __init__(self, host='localhost', port=DEFAULT_LOGGING_CONFIG_PORT,
|
||||
handler=None):
|
||||
ThreadingTCPServer.__init__(self, (host, port), handler)
|
||||
logging._acquireLock()
|
||||
self.abort = 0
|
||||
logging._releaseLock()
|
||||
self.timeout = 1
|
||||
|
||||
def serve_until_stopped(self):
|
||||
import select
|
||||
abort = 0
|
||||
while not abort:
|
||||
rd, wr, ex = select.select([self.socket.fileno()],
|
||||
[], [],
|
||||
self.timeout)
|
||||
if rd:
|
||||
self.handle_request()
|
||||
logging._acquireLock()
|
||||
abort = self.abort
|
||||
logging._releaseLock()
|
||||
|
||||
def serve(rcvr, hdlr, port):
|
||||
server = rcvr(port=port, handler=hdlr)
|
||||
global _listener
|
||||
logging._acquireLock()
|
||||
_listener = server
|
||||
logging._releaseLock()
|
||||
server.serve_until_stopped()
|
||||
|
||||
return threading.Thread(target=serve,
|
||||
args=(ConfigSocketReceiver,
|
||||
ConfigStreamHandler, port))
|
||||
|
||||
def stopListening():
|
||||
"""
|
||||
Stop the listening server which was created with a call to listen().
|
||||
"""
|
||||
global _listener
|
||||
if _listener:
|
||||
logging._acquireLock()
|
||||
_listener.abort = 1
|
||||
_listener = None
|
||||
logging._releaseLock()
|
728
planet/compat_logging/handlers.py
Normal file
728
planet/compat_logging/handlers.py
Normal file
@ -0,0 +1,728 @@
|
||||
# Copyright 2001-2002 by Vinay Sajip. All Rights Reserved.
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose and without fee is hereby granted,
|
||||
# provided that the above copyright notice appear in all copies and that
|
||||
# both that copyright notice and this permission notice appear in
|
||||
# supporting documentation, and that the name of Vinay Sajip
|
||||
# not be used in advertising or publicity pertaining to distribution
|
||||
# of the software without specific, written prior permission.
|
||||
# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
|
||||
# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
|
||||
# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
|
||||
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
"""
|
||||
Logging package for Python. Based on PEP 282 and comments thereto in
|
||||
comp.lang.python, and influenced by Apache's log4j system.
|
||||
|
||||
Should work under Python versions >= 1.5.2, except that source line
|
||||
information is not available unless 'inspect' is.
|
||||
|
||||
Copyright (C) 2001-2002 Vinay Sajip. All Rights Reserved.
|
||||
|
||||
To use, simply 'import logging' and log away!
|
||||
"""
|
||||
|
||||
import sys, logging, socket, types, os, string, cPickle, struct, time
|
||||
|
||||
from SocketServer import ThreadingTCPServer, StreamRequestHandler
|
||||
|
||||
#
|
||||
# Some constants...
|
||||
#
|
||||
|
||||
DEFAULT_TCP_LOGGING_PORT = 9020
|
||||
DEFAULT_UDP_LOGGING_PORT = 9021
|
||||
DEFAULT_HTTP_LOGGING_PORT = 9022
|
||||
DEFAULT_SOAP_LOGGING_PORT = 9023
|
||||
SYSLOG_UDP_PORT = 514
|
||||
|
||||
|
||||
class RotatingFileHandler(logging.FileHandler):
|
||||
def __init__(self, filename, mode="a", maxBytes=0, backupCount=0):
|
||||
"""
|
||||
Open the specified file and use it as the stream for logging.
|
||||
|
||||
By default, the file grows indefinitely. You can specify particular
|
||||
values of maxBytes and backupCount to allow the file to rollover at
|
||||
a predetermined size.
|
||||
|
||||
Rollover occurs whenever the current log file is nearly maxBytes in
|
||||
length. If backupCount is >= 1, the system will successively create
|
||||
new files with the same pathname as the base file, but with extensions
|
||||
".1", ".2" etc. appended to it. For example, with a backupCount of 5
|
||||
and a base file name of "app.log", you would get "app.log",
|
||||
"app.log.1", "app.log.2", ... through to "app.log.5". The file being
|
||||
written to is always "app.log" - when it gets filled up, it is closed
|
||||
and renamed to "app.log.1", and if files "app.log.1", "app.log.2" etc.
|
||||
exist, then they are renamed to "app.log.2", "app.log.3" etc.
|
||||
respectively.
|
||||
|
||||
If maxBytes is zero, rollover never occurs.
|
||||
"""
|
||||
logging.FileHandler.__init__(self, filename, mode)
|
||||
self.maxBytes = maxBytes
|
||||
self.backupCount = backupCount
|
||||
if maxBytes > 0:
|
||||
self.mode = "a"
|
||||
|
||||
def doRollover(self):
|
||||
"""
|
||||
Do a rollover, as described in __init__().
|
||||
"""
|
||||
|
||||
self.stream.close()
|
||||
if self.backupCount > 0:
|
||||
for i in range(self.backupCount - 1, 0, -1):
|
||||
sfn = "%s.%d" % (self.baseFilename, i)
|
||||
dfn = "%s.%d" % (self.baseFilename, i + 1)
|
||||
if os.path.exists(sfn):
|
||||
#print "%s -> %s" % (sfn, dfn)
|
||||
if os.path.exists(dfn):
|
||||
os.remove(dfn)
|
||||
os.rename(sfn, dfn)
|
||||
dfn = self.baseFilename + ".1"
|
||||
if os.path.exists(dfn):
|
||||
os.remove(dfn)
|
||||
os.rename(self.baseFilename, dfn)
|
||||
#print "%s -> %s" % (self.baseFilename, dfn)
|
||||
self.stream = open(self.baseFilename, "w")
|
||||
|
||||
def emit(self, record):
|
||||
"""
|
||||
Emit a record.
|
||||
|
||||
Output the record to the file, catering for rollover as described
|
||||
in doRollover().
|
||||
"""
|
||||
if self.maxBytes > 0: # are we rolling over?
|
||||
msg = "%s\n" % self.format(record)
|
||||
self.stream.seek(0, 2) #due to non-posix-compliant Windows feature
|
||||
if self.stream.tell() + len(msg) >= self.maxBytes:
|
||||
self.doRollover()
|
||||
logging.FileHandler.emit(self, record)
|
||||
|
||||
|
||||
class SocketHandler(logging.Handler):
|
||||
"""
|
||||
A handler class which writes logging records, in pickle format, to
|
||||
a streaming socket. The socket is kept open across logging calls.
|
||||
If the peer resets it, an attempt is made to reconnect on the next call.
|
||||
The pickle which is sent is that of the LogRecord's attribute dictionary
|
||||
(__dict__), so that the receiver does not need to have the logging module
|
||||
installed in order to process the logging event.
|
||||
|
||||
To unpickle the record at the receiving end into a LogRecord, use the
|
||||
makeLogRecord function.
|
||||
"""
|
||||
|
||||
def __init__(self, host, port):
|
||||
"""
|
||||
Initializes the handler with a specific host address and port.
|
||||
|
||||
The attribute 'closeOnError' is set to 1 - which means that if
|
||||
a socket error occurs, the socket is silently closed and then
|
||||
reopened on the next logging call.
|
||||
"""
|
||||
logging.Handler.__init__(self)
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.sock = None
|
||||
self.closeOnError = 0
|
||||
|
||||
def makeSocket(self):
|
||||
"""
|
||||
A factory method which allows subclasses to define the precise
|
||||
type of socket they want.
|
||||
"""
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.connect((self.host, self.port))
|
||||
return s
|
||||
|
||||
def send(self, s):
|
||||
"""
|
||||
Send a pickled string to the socket.
|
||||
|
||||
This function allows for partial sends which can happen when the
|
||||
network is busy.
|
||||
"""
|
||||
if hasattr(self.sock, "sendall"):
|
||||
self.sock.sendall(s)
|
||||
else:
|
||||
sentsofar = 0
|
||||
left = len(s)
|
||||
while left > 0:
|
||||
sent = self.sock.send(s[sentsofar:])
|
||||
sentsofar = sentsofar + sent
|
||||
left = left - sent
|
||||
|
||||
def makePickle(self, record):
|
||||
"""
|
||||
Pickles the record in binary format with a length prefix, and
|
||||
returns it ready for transmission across the socket.
|
||||
"""
|
||||
s = cPickle.dumps(record.__dict__, 1)
|
||||
#n = len(s)
|
||||
#slen = "%c%c" % ((n >> 8) & 0xFF, n & 0xFF)
|
||||
slen = struct.pack(">L", len(s))
|
||||
return slen + s
|
||||
|
||||
def handleError(self, record):
|
||||
"""
|
||||
Handle an error during logging.
|
||||
|
||||
An error has occurred during logging. Most likely cause -
|
||||
connection lost. Close the socket so that we can retry on the
|
||||
next event.
|
||||
"""
|
||||
if self.closeOnError and self.sock:
|
||||
self.sock.close()
|
||||
self.sock = None #try to reconnect next time
|
||||
else:
|
||||
logging.Handler.handleError(self, record)
|
||||
|
||||
def emit(self, record):
|
||||
"""
|
||||
Emit a record.
|
||||
|
||||
Pickles the record and writes it to the socket in binary format.
|
||||
If there is an error with the socket, silently drop the packet.
|
||||
If there was a problem with the socket, re-establishes the
|
||||
socket.
|
||||
"""
|
||||
try:
|
||||
s = self.makePickle(record)
|
||||
if not self.sock:
|
||||
self.sock = self.makeSocket()
|
||||
self.send(s)
|
||||
except:
|
||||
self.handleError(record)
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Closes the socket.
|
||||
"""
|
||||
if self.sock:
|
||||
self.sock.close()
|
||||
self.sock = None
|
||||
|
||||
class DatagramHandler(SocketHandler):
|
||||
"""
|
||||
A handler class which writes logging records, in pickle format, to
|
||||
a datagram socket. The pickle which is sent is that of the LogRecord's
|
||||
attribute dictionary (__dict__), so that the receiver does not need to
|
||||
have the logging module installed in order to process the logging event.
|
||||
|
||||
To unpickle the record at the receiving end into a LogRecord, use the
|
||||
makeLogRecord function.
|
||||
|
||||
"""
|
||||
def __init__(self, host, port):
|
||||
"""
|
||||
Initializes the handler with a specific host address and port.
|
||||
"""
|
||||
SocketHandler.__init__(self, host, port)
|
||||
self.closeOnError = 0
|
||||
|
||||
def makeSocket(self):
|
||||
"""
|
||||
The factory method of SocketHandler is here overridden to create
|
||||
a UDP socket (SOCK_DGRAM).
|
||||
"""
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
return s
|
||||
|
||||
def send(self, s):
|
||||
"""
|
||||
Send a pickled string to a socket.
|
||||
|
||||
This function no longer allows for partial sends which can happen
|
||||
when the network is busy - UDP does not guarantee delivery and
|
||||
can deliver packets out of sequence.
|
||||
"""
|
||||
self.sock.sendto(s, (self.host, self.port))
|
||||
|
||||
class SysLogHandler(logging.Handler):
|
||||
"""
|
||||
A handler class which sends formatted logging records to a syslog
|
||||
server. Based on Sam Rushing's syslog module:
|
||||
http://www.nightmare.com/squirl/python-ext/misc/syslog.py
|
||||
Contributed by Nicolas Untz (after which minor refactoring changes
|
||||
have been made).
|
||||
"""
|
||||
|
||||
# from <linux/sys/syslog.h>:
|
||||
# ======================================================================
|
||||
# priorities/facilities are encoded into a single 32-bit quantity, where
|
||||
# the bottom 3 bits are the priority (0-7) and the top 28 bits are the
|
||||
# facility (0-big number). Both the priorities and the facilities map
|
||||
# roughly one-to-one to strings in the syslogd(8) source code. This
|
||||
# mapping is included in this file.
|
||||
#
|
||||
# priorities (these are ordered)
|
||||
|
||||
LOG_EMERG = 0 # system is unusable
|
||||
LOG_ALERT = 1 # action must be taken immediately
|
||||
LOG_CRIT = 2 # critical conditions
|
||||
LOG_ERR = 3 # error conditions
|
||||
LOG_WARNING = 4 # warning conditions
|
||||
LOG_NOTICE = 5 # normal but significant condition
|
||||
LOG_INFO = 6 # informational
|
||||
LOG_DEBUG = 7 # debug-level messages
|
||||
|
||||
# facility codes
|
||||
LOG_KERN = 0 # kernel messages
|
||||
LOG_USER = 1 # random user-level messages
|
||||
LOG_MAIL = 2 # mail system
|
||||
LOG_DAEMON = 3 # system daemons
|
||||
LOG_AUTH = 4 # security/authorization messages
|
||||
LOG_SYSLOG = 5 # messages generated internally by syslogd
|
||||
LOG_LPR = 6 # line printer subsystem
|
||||
LOG_NEWS = 7 # network news subsystem
|
||||
LOG_UUCP = 8 # UUCP subsystem
|
||||
LOG_CRON = 9 # clock daemon
|
||||
LOG_AUTHPRIV = 10 # security/authorization messages (private)
|
||||
|
||||
# other codes through 15 reserved for system use
|
||||
LOG_LOCAL0 = 16 # reserved for local use
|
||||
LOG_LOCAL1 = 17 # reserved for local use
|
||||
LOG_LOCAL2 = 18 # reserved for local use
|
||||
LOG_LOCAL3 = 19 # reserved for local use
|
||||
LOG_LOCAL4 = 20 # reserved for local use
|
||||
LOG_LOCAL5 = 21 # reserved for local use
|
||||
LOG_LOCAL6 = 22 # reserved for local use
|
||||
LOG_LOCAL7 = 23 # reserved for local use
|
||||
|
||||
priority_names = {
|
||||
"alert": LOG_ALERT,
|
||||
"crit": LOG_CRIT,
|
||||
"critical": LOG_CRIT,
|
||||
"debug": LOG_DEBUG,
|
||||
"emerg": LOG_EMERG,
|
||||
"err": LOG_ERR,
|
||||
"error": LOG_ERR, # DEPRECATED
|
||||
"info": LOG_INFO,
|
||||
"notice": LOG_NOTICE,
|
||||
"panic": LOG_EMERG, # DEPRECATED
|
||||
"warn": LOG_WARNING, # DEPRECATED
|
||||
"warning": LOG_WARNING,
|
||||
}
|
||||
|
||||
facility_names = {
|
||||
"auth": LOG_AUTH,
|
||||
"authpriv": LOG_AUTHPRIV,
|
||||
"cron": LOG_CRON,
|
||||
"daemon": LOG_DAEMON,
|
||||
"kern": LOG_KERN,
|
||||
"lpr": LOG_LPR,
|
||||
"mail": LOG_MAIL,
|
||||
"news": LOG_NEWS,
|
||||
"security": LOG_AUTH, # DEPRECATED
|
||||
"syslog": LOG_SYSLOG,
|
||||
"user": LOG_USER,
|
||||
"uucp": LOG_UUCP,
|
||||
"local0": LOG_LOCAL0,
|
||||
"local1": LOG_LOCAL1,
|
||||
"local2": LOG_LOCAL2,
|
||||
"local3": LOG_LOCAL3,
|
||||
"local4": LOG_LOCAL4,
|
||||
"local5": LOG_LOCAL5,
|
||||
"local6": LOG_LOCAL6,
|
||||
"local7": LOG_LOCAL7,
|
||||
}
|
||||
|
||||
def __init__(self, address=('localhost', SYSLOG_UDP_PORT), facility=LOG_USER):
|
||||
"""
|
||||
Initialize a handler.
|
||||
|
||||
If address is specified as a string, UNIX socket is used.
|
||||
If facility is not specified, LOG_USER is used.
|
||||
"""
|
||||
logging.Handler.__init__(self)
|
||||
|
||||
self.address = address
|
||||
self.facility = facility
|
||||
if type(address) == types.StringType:
|
||||
self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
|
||||
# syslog may require either DGRAM or STREAM sockets
|
||||
try:
|
||||
self.socket.connect(address)
|
||||
except socket.error:
|
||||
self.socket.close()
|
||||
self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
self.socket.connect(address)
|
||||
self.unixsocket = 1
|
||||
else:
|
||||
self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
self.unixsocket = 0
|
||||
|
||||
self.formatter = None
|
||||
|
||||
# curious: when talking to the unix-domain '/dev/log' socket, a
|
||||
# zero-terminator seems to be required. this string is placed
|
||||
# into a class variable so that it can be overridden if
|
||||
# necessary.
|
||||
log_format_string = '<%d>%s\000'
|
||||
|
||||
def encodePriority (self, facility, priority):
|
||||
"""
|
||||
Encode the facility and priority. You can pass in strings or
|
||||
integers - if strings are passed, the facility_names and
|
||||
priority_names mapping dictionaries are used to convert them to
|
||||
integers.
|
||||
"""
|
||||
if type(facility) == types.StringType:
|
||||
facility = self.facility_names[facility]
|
||||
if type(priority) == types.StringType:
|
||||
priority = self.priority_names[priority]
|
||||
return (facility << 3) | priority
|
||||
|
||||
def close (self):
|
||||
"""
|
||||
Closes the socket.
|
||||
"""
|
||||
if self.unixsocket:
|
||||
self.socket.close()
|
||||
|
||||
def emit(self, record):
|
||||
"""
|
||||
Emit a record.
|
||||
|
||||
The record is formatted, and then sent to the syslog server. If
|
||||
exception information is present, it is NOT sent to the server.
|
||||
"""
|
||||
msg = self.format(record)
|
||||
"""
|
||||
We need to convert record level to lowercase, maybe this will
|
||||
change in the future.
|
||||
"""
|
||||
msg = self.log_format_string % (
|
||||
self.encodePriority(self.facility,
|
||||
string.lower(record.levelname)),
|
||||
msg)
|
||||
try:
|
||||
if self.unixsocket:
|
||||
self.socket.send(msg)
|
||||
else:
|
||||
self.socket.sendto(msg, self.address)
|
||||
except:
|
||||
self.handleError(record)
|
||||
|
||||
class SMTPHandler(logging.Handler):
|
||||
"""
|
||||
A handler class which sends an SMTP email for each logging event.
|
||||
"""
|
||||
def __init__(self, mailhost, fromaddr, toaddrs, subject):
|
||||
"""
|
||||
Initialize the handler.
|
||||
|
||||
Initialize the instance with the from and to addresses and subject
|
||||
line of the email. To specify a non-standard SMTP port, use the
|
||||
(host, port) tuple format for the mailhost argument.
|
||||
"""
|
||||
logging.Handler.__init__(self)
|
||||
if type(mailhost) == types.TupleType:
|
||||
host, port = mailhost
|
||||
self.mailhost = host
|
||||
self.mailport = port
|
||||
else:
|
||||
self.mailhost = mailhost
|
||||
self.mailport = None
|
||||
self.fromaddr = fromaddr
|
||||
if type(toaddrs) == types.StringType:
|
||||
toaddrs = [toaddrs]
|
||||
self.toaddrs = toaddrs
|
||||
self.subject = subject
|
||||
|
||||
def getSubject(self, record):
|
||||
"""
|
||||
Determine the subject for the email.
|
||||
|
||||
If you want to specify a subject line which is record-dependent,
|
||||
override this method.
|
||||
"""
|
||||
return self.subject
|
||||
|
||||
weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||
|
||||
monthname = [None,
|
||||
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
|
||||
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||
|
||||
def date_time(self):
|
||||
"""Return the current date and time formatted for a MIME header."""
|
||||
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(time.time())
|
||||
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
|
||||
self.weekdayname[wd],
|
||||
day, self.monthname[month], year,
|
||||
hh, mm, ss)
|
||||
return s
|
||||
|
||||
def emit(self, record):
|
||||
"""
|
||||
Emit a record.
|
||||
|
||||
Format the record and send it to the specified addressees.
|
||||
"""
|
||||
try:
|
||||
import smtplib
|
||||
port = self.mailport
|
||||
if not port:
|
||||
port = smtplib.SMTP_PORT
|
||||
smtp = smtplib.SMTP(self.mailhost, port)
|
||||
msg = self.format(record)
|
||||
msg = "From: %s\r\nTo: %s\r\nSubject: %s\r\nDate: %s\r\n\r\n%s" % (
|
||||
self.fromaddr,
|
||||
string.join(self.toaddrs, ","),
|
||||
self.getSubject(record),
|
||||
self.date_time(), msg)
|
||||
smtp.sendmail(self.fromaddr, self.toaddrs, msg)
|
||||
smtp.quit()
|
||||
except:
|
||||
self.handleError(record)
|
||||
|
||||
class NTEventLogHandler(logging.Handler):
|
||||
"""
|
||||
A handler class which sends events to the NT Event Log. Adds a
|
||||
registry entry for the specified application name. If no dllname is
|
||||
provided, win32service.pyd (which contains some basic message
|
||||
placeholders) is used. Note that use of these placeholders will make
|
||||
your event logs big, as the entire message source is held in the log.
|
||||
If you want slimmer logs, you have to pass in the name of your own DLL
|
||||
which contains the message definitions you want to use in the event log.
|
||||
"""
|
||||
def __init__(self, appname, dllname=None, logtype="Application"):
|
||||
logging.Handler.__init__(self)
|
||||
try:
|
||||
import win32evtlogutil, win32evtlog
|
||||
self.appname = appname
|
||||
self._welu = win32evtlogutil
|
||||
if not dllname:
|
||||
dllname = os.path.split(self._welu.__file__)
|
||||
dllname = os.path.split(dllname[0])
|
||||
dllname = os.path.join(dllname[0], r'win32service.pyd')
|
||||
self.dllname = dllname
|
||||
self.logtype = logtype
|
||||
self._welu.AddSourceToRegistry(appname, dllname, logtype)
|
||||
self.deftype = win32evtlog.EVENTLOG_ERROR_TYPE
|
||||
self.typemap = {
|
||||
logging.DEBUG : win32evtlog.EVENTLOG_INFORMATION_TYPE,
|
||||
logging.INFO : win32evtlog.EVENTLOG_INFORMATION_TYPE,
|
||||
logging.WARNING : win32evtlog.EVENTLOG_WARNING_TYPE,
|
||||
logging.ERROR : win32evtlog.EVENTLOG_ERROR_TYPE,
|
||||
logging.CRITICAL: win32evtlog.EVENTLOG_ERROR_TYPE,
|
||||
}
|
||||
except ImportError:
|
||||
print "The Python Win32 extensions for NT (service, event "\
|
||||
"logging) appear not to be available."
|
||||
self._welu = None
|
||||
|
||||
def getMessageID(self, record):
|
||||
"""
|
||||
Return the message ID for the event record. If you are using your
|
||||
own messages, you could do this by having the msg passed to the
|
||||
logger being an ID rather than a formatting string. Then, in here,
|
||||
you could use a dictionary lookup to get the message ID. This
|
||||
version returns 1, which is the base message ID in win32service.pyd.
|
||||
"""
|
||||
return 1
|
||||
|
||||
def getEventCategory(self, record):
|
||||
"""
|
||||
Return the event category for the record.
|
||||
|
||||
Override this if you want to specify your own categories. This version
|
||||
returns 0.
|
||||
"""
|
||||
return 0
|
||||
|
||||
def getEventType(self, record):
|
||||
"""
|
||||
Return the event type for the record.
|
||||
|
||||
Override this if you want to specify your own types. This version does
|
||||
a mapping using the handler's typemap attribute, which is set up in
|
||||
__init__() to a dictionary which contains mappings for DEBUG, INFO,
|
||||
WARNING, ERROR and CRITICAL. If you are using your own levels you will
|
||||
either need to override this method or place a suitable dictionary in
|
||||
the handler's typemap attribute.
|
||||
"""
|
||||
return self.typemap.get(record.levelno, self.deftype)
|
||||
|
||||
def emit(self, record):
|
||||
"""
|
||||
Emit a record.
|
||||
|
||||
Determine the message ID, event category and event type. Then
|
||||
log the message in the NT event log.
|
||||
"""
|
||||
if self._welu:
|
||||
try:
|
||||
id = self.getMessageID(record)
|
||||
cat = self.getEventCategory(record)
|
||||
type = self.getEventType(record)
|
||||
msg = self.format(record)
|
||||
self._welu.ReportEvent(self.appname, id, cat, type, [msg])
|
||||
except:
|
||||
self.handleError(record)
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Clean up this handler.
|
||||
|
||||
You can remove the application name from the registry as a
|
||||
source of event log entries. However, if you do this, you will
|
||||
not be able to see the events as you intended in the Event Log
|
||||
Viewer - it needs to be able to access the registry to get the
|
||||
DLL name.
|
||||
"""
|
||||
#self._welu.RemoveSourceFromRegistry(self.appname, self.logtype)
|
||||
pass
|
||||
|
||||
class HTTPHandler(logging.Handler):
|
||||
"""
|
||||
A class which sends records to a Web server, using either GET or
|
||||
POST semantics.
|
||||
"""
|
||||
def __init__(self, host, url, method="GET"):
|
||||
"""
|
||||
Initialize the instance with the host, the request URL, and the method
|
||||
("GET" or "POST")
|
||||
"""
|
||||
logging.Handler.__init__(self)
|
||||
method = string.upper(method)
|
||||
if method not in ["GET", "POST"]:
|
||||
raise ValueError, "method must be GET or POST"
|
||||
self.host = host
|
||||
self.url = url
|
||||
self.method = method
|
||||
|
||||
def mapLogRecord(self, record):
|
||||
"""
|
||||
Default implementation of mapping the log record into a dict
|
||||
that is send as the CGI data. Overwrite in your class.
|
||||
Contributed by Franz Glasner.
|
||||
"""
|
||||
return record.__dict__
|
||||
|
||||
def emit(self, record):
|
||||
"""
|
||||
Emit a record.
|
||||
|
||||
Send the record to the Web server as an URL-encoded dictionary
|
||||
"""
|
||||
try:
|
||||
import httplib, urllib
|
||||
h = httplib.HTTP(self.host)
|
||||
url = self.url
|
||||
data = urllib.urlencode(self.mapLogRecord(record))
|
||||
if self.method == "GET":
|
||||
if (string.find(url, '?') >= 0):
|
||||
sep = '&'
|
||||
else:
|
||||
sep = '?'
|
||||
url = url + "%c%s" % (sep, data)
|
||||
h.putrequest(self.method, url)
|
||||
if self.method == "POST":
|
||||
h.putheader("Content-length", str(len(data)))
|
||||
h.endheaders()
|
||||
if self.method == "POST":
|
||||
h.send(data)
|
||||
h.getreply() #can't do anything with the result
|
||||
except:
|
||||
self.handleError(record)
|
||||
|
||||
class BufferingHandler(logging.Handler):
|
||||
"""
|
||||
A handler class which buffers logging records in memory. Whenever each
|
||||
record is added to the buffer, a check is made to see if the buffer should
|
||||
be flushed. If it should, then flush() is expected to do what's needed.
|
||||
"""
|
||||
def __init__(self, capacity):
|
||||
"""
|
||||
Initialize the handler with the buffer size.
|
||||
"""
|
||||
logging.Handler.__init__(self)
|
||||
self.capacity = capacity
|
||||
self.buffer = []
|
||||
|
||||
def shouldFlush(self, record):
|
||||
"""
|
||||
Should the handler flush its buffer?
|
||||
|
||||
Returns true if the buffer is up to capacity. This method can be
|
||||
overridden to implement custom flushing strategies.
|
||||
"""
|
||||
return (len(self.buffer) >= self.capacity)
|
||||
|
||||
def emit(self, record):
|
||||
"""
|
||||
Emit a record.
|
||||
|
||||
Append the record. If shouldFlush() tells us to, call flush() to process
|
||||
the buffer.
|
||||
"""
|
||||
self.buffer.append(record)
|
||||
if self.shouldFlush(record):
|
||||
self.flush()
|
||||
|
||||
def flush(self):
|
||||
"""
|
||||
Override to implement custom flushing behaviour.
|
||||
|
||||
This version just zaps the buffer to empty.
|
||||
"""
|
||||
self.buffer = []
|
||||
|
||||
class MemoryHandler(BufferingHandler):
|
||||
"""
|
||||
A handler class which buffers logging records in memory, periodically
|
||||
flushing them to a target handler. Flushing occurs whenever the buffer
|
||||
is full, or when an event of a certain severity or greater is seen.
|
||||
"""
|
||||
def __init__(self, capacity, flushLevel=logging.ERROR, target=None):
|
||||
"""
|
||||
Initialize the handler with the buffer size, the level at which
|
||||
flushing should occur and an optional target.
|
||||
|
||||
Note that without a target being set either here or via setTarget(),
|
||||
a MemoryHandler is no use to anyone!
|
||||
"""
|
||||
BufferingHandler.__init__(self, capacity)
|
||||
self.flushLevel = flushLevel
|
||||
self.target = target
|
||||
|
||||
def shouldFlush(self, record):
|
||||
"""
|
||||
Check for buffer full or a record at the flushLevel or higher.
|
||||
"""
|
||||
return (len(self.buffer) >= self.capacity) or \
|
||||
(record.levelno >= self.flushLevel)
|
||||
|
||||
def setTarget(self, target):
|
||||
"""
|
||||
Set the target handler for this handler.
|
||||
"""
|
||||
self.target = target
|
||||
|
||||
def flush(self):
|
||||
"""
|
||||
For a MemoryHandler, flushing means just sending the buffered
|
||||
records to the target, if there is one. Override if you want
|
||||
different behaviour.
|
||||
"""
|
||||
if self.target:
|
||||
for record in self.buffer:
|
||||
self.target.handle(record)
|
||||
self.buffer = []
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Flush, set the target to None and lose the buffer.
|
||||
"""
|
||||
self.flush()
|
||||
self.target = None
|
||||
self.buffer = []
|
112
planet/config.py
Normal file
112
planet/config.py
Normal file
@ -0,0 +1,112 @@
|
||||
"""
|
||||
Planet Configuration
|
||||
|
||||
This module encapsulates all planet configuration. This is not a generic
|
||||
configuration parser, it knows everything about configuring a planet - from
|
||||
the structure of the ini file, to knowledge of data types, even down to
|
||||
what are the defaults.
|
||||
|
||||
Usage:
|
||||
from planet import config
|
||||
config.load('config.ini')
|
||||
|
||||
# administrative / structural information
|
||||
print config.templates()
|
||||
print config.feeds()
|
||||
|
||||
# planet wide configuration
|
||||
print config.name()
|
||||
print config.link()
|
||||
|
||||
# per template configuration
|
||||
print config.days_per_page('atom.xml.tmpl')
|
||||
print config.encoding('index.html.tmpl')
|
||||
|
||||
Todo:
|
||||
* error handling (example: no planet section)
|
||||
"""
|
||||
|
||||
import sys
|
||||
from ConfigParser import ConfigParser
|
||||
|
||||
parser = ConfigParser()
|
||||
|
||||
planet_predefined_options = []
|
||||
|
||||
def __init__():
|
||||
"""define the struture of an ini file"""
|
||||
from planet import config
|
||||
|
||||
def get(section, option, default):
|
||||
if section and parser.has_option(section, option):
|
||||
return parser.get(section, option)
|
||||
elif parser.has_option('Planet', option):
|
||||
return parser.get('Planet', option)
|
||||
else:
|
||||
return default
|
||||
|
||||
def define_planet(name, default):
|
||||
setattr(config, name, lambda default=default: get(None,name,default))
|
||||
planet_predefined_options.append(name)
|
||||
|
||||
def define_tmpl(name, default):
|
||||
setattr(config, name, lambda section, default=default:
|
||||
get(section,name,default))
|
||||
|
||||
def define_tmpl_int(name, default):
|
||||
setattr(config, name, lambda section, default=default:
|
||||
int(get(section,name,default)))
|
||||
|
||||
# planet wide options
|
||||
define_planet('name', "Unconfigured Planet")
|
||||
define_planet('link', "Unconfigured Planet")
|
||||
define_planet('cache_directory', "cache")
|
||||
define_planet('log_level', "WARNING")
|
||||
define_planet('feed_timeout', 20)
|
||||
|
||||
# template options
|
||||
define_tmpl_int('days_per_page', 0)
|
||||
define_tmpl_int('items_per_page', 60)
|
||||
define_tmpl('encoding', 'utf-8')
|
||||
|
||||
# prevent re-initialization
|
||||
setattr(config, '__init__', lambda: None)
|
||||
|
||||
def load(file):
|
||||
""" initialize and load a configuration"""
|
||||
__init__()
|
||||
global parser
|
||||
parser = ConfigParser()
|
||||
parser.read(file)
|
||||
|
||||
def template_files():
|
||||
""" list the templates defined """
|
||||
return parser.get('Planet','template_files').split(' ')
|
||||
|
||||
def feeds():
|
||||
""" list the feeds defined """
|
||||
return filter(lambda feed: feed!='Planet' and feed not in template_files(),
|
||||
parser.sections())
|
||||
|
||||
def planet_options():
|
||||
""" dictionary of planet wide options"""
|
||||
return dict(map(lambda opt: (opt, parser.get('Planet',opt)),
|
||||
parser.options('Planet')))
|
||||
|
||||
def feed_options(section):
|
||||
""" dictionary of feed specific options"""
|
||||
from planet import config
|
||||
options = dict([(key,value) for key,value in planet_options().items()
|
||||
if key not in planet_predefined_options])
|
||||
if parser.has_section(section):
|
||||
options.update(dict(map(lambda opt: (opt, parser.get(section,opt)),
|
||||
parser.options(section))))
|
||||
return options
|
||||
|
||||
def template_options(section):
|
||||
""" dictionary of template specific options"""
|
||||
return feed_options(section)
|
||||
|
||||
def write(file=sys.stdout):
|
||||
""" write out an updated template """
|
||||
print parser.write(file)
|
3656
planet/feedparser.py
Executable file
3656
planet/feedparser.py
Executable file
File diff suppressed because it is too large
Load Diff
195
planet/reconstitute.py
Normal file
195
planet/reconstitute.py
Normal file
@ -0,0 +1,195 @@
|
||||
"""
|
||||
Reconstitute an entry document from the output of the Universal Feed Parser.
|
||||
|
||||
The main entry point is called 'reconstitute'. Input parameters are:
|
||||
|
||||
results: this is the entire hash table return by the UFP
|
||||
entry: this is the entry in the hash that you want reconstituted
|
||||
|
||||
The value returned is an XML DOM. Every effort is made to convert
|
||||
everything to unicode, and text fields into either plain text or
|
||||
well formed XHTML.
|
||||
|
||||
Todo:
|
||||
* extension elements
|
||||
"""
|
||||
import re, time, md5, sgmllib
|
||||
from xml.sax.saxutils import escape
|
||||
from xml.dom import minidom
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from xml.parsers.expat import ExpatError
|
||||
|
||||
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
|
||||
|
||||
def createTextElement(parent, name, value):
|
||||
""" utility function to create a child element with the specified text"""
|
||||
if not value: return
|
||||
xdoc = parent.ownerDocument
|
||||
xelement = xdoc.createElement(name)
|
||||
xelement.appendChild(xdoc.createTextNode(value))
|
||||
parent.appendChild(xelement)
|
||||
|
||||
def invalidate(c):
|
||||
""" replace invalid characters """
|
||||
return '<acronym title="U+%s">\xef\xbf\xbd</acronym>' % \
|
||||
hex(ord(c.group(0)))[2:].rjust(4,'0')
|
||||
|
||||
def ncr2c(value):
|
||||
""" convert numeric character references to characters """
|
||||
value=value.group(1)
|
||||
if value.startswith('x'):
|
||||
value=unichr(int(value[1:],16))
|
||||
else:
|
||||
value=unichr(int(value))
|
||||
return value
|
||||
|
||||
def normalize(text, bozo):
|
||||
""" convert everything to well formed XML """
|
||||
if text.has_key('type'):
|
||||
if text.type.lower().find('html')<0:
|
||||
text['value'] = escape(text.value)
|
||||
text['type'] = 'text/html'
|
||||
if text.type.lower() == 'text/html' or bozo:
|
||||
dom=BeautifulSoup(text.value,convertEntities="html")
|
||||
for tag in dom.findAll(True):
|
||||
for attr,value in tag.attrs:
|
||||
value=sgmllib.charref.sub(ncr2c,value)
|
||||
value=illegal_xml_chars.sub(u'\uFFFD',value)
|
||||
tag[attr]=value
|
||||
text['value'] = illegal_xml_chars.sub(invalidate, str(dom))
|
||||
return text
|
||||
|
||||
def id(xentry, entry):
|
||||
""" copy or compute an id for the entry """
|
||||
|
||||
if entry.has_key("id"):
|
||||
entry_id = entry.id
|
||||
elif entry.has_key("link"):
|
||||
entry_id = entry.link
|
||||
elif entry.has_key("title"):
|
||||
entry_id = (entry.title_detail.base + "/" +
|
||||
md5.new(entry.title).hexdigest())
|
||||
elif entry.has_key("summary"):
|
||||
entry_id = (entry.summary_detail.base + "/" +
|
||||
md5.new(entry.summary).hexdigest())
|
||||
elif entry.has_key("content"):
|
||||
entry_id = (entry.content[0].base + "/" +
|
||||
md5.new(entry.content[0].value).hexdigest())
|
||||
else:
|
||||
return
|
||||
|
||||
if xentry: createTextElement(xentry, 'id', entry_id)
|
||||
return entry_id
|
||||
|
||||
def links(xentry, entry):
|
||||
""" copy links to the entry """
|
||||
if not entry.has_key('links'): return
|
||||
xdoc = xentry.ownerDocument
|
||||
for link in entry.links:
|
||||
xlink = xdoc.createElement('link')
|
||||
xlink.setAttribute('type', link.type)
|
||||
xlink.setAttribute('href', link.href)
|
||||
xlink.setAttribute('rel', link.rel)
|
||||
xentry.appendChild(xlink)
|
||||
|
||||
def date(xentry, name, parsed):
|
||||
""" insert a date-formated element into the entry """
|
||||
if not parsed: return
|
||||
formatted = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsed)
|
||||
createTextElement(xentry, name, formatted)
|
||||
|
||||
def author(xentry, name, detail):
|
||||
""" insert an author-like element into the entry """
|
||||
if not detail: return
|
||||
xdoc = xentry.ownerDocument
|
||||
xauthor = xdoc.createElement(name)
|
||||
|
||||
createTextElement(xauthor, 'name', detail.get('name', None))
|
||||
createTextElement(xauthor, 'email', detail.get('email', None))
|
||||
createTextElement(xauthor, 'uri', detail.get('href', None))
|
||||
|
||||
xentry.appendChild(xauthor)
|
||||
|
||||
def content(xentry, name, detail, bozo):
|
||||
""" insert a content-like element into the entry """
|
||||
if not detail or not detail.value: return
|
||||
normalize(detail, bozo)
|
||||
xdoc = xentry.ownerDocument
|
||||
xcontent = xdoc.createElement(name)
|
||||
|
||||
try:
|
||||
# see if the resulting text is a well-formed XML fragment
|
||||
div = '<div xmlns="http://www.w3.org/1999/xhtml">%s</div>'
|
||||
if isinstance(detail.value,unicode):
|
||||
detail.value=detail.value.encode('utf-8')
|
||||
data = minidom.parseString(div % detail.value).documentElement
|
||||
|
||||
if detail.value.find('<') < 0:
|
||||
xcontent.appendChild(data.firstChild)
|
||||
else:
|
||||
xcontent.setAttribute('type', 'xhtml')
|
||||
xcontent.appendChild(data)
|
||||
|
||||
except ExpatError:
|
||||
# leave as html
|
||||
xcontent.setAttribute('type', 'html')
|
||||
xcontent.appendChild(xdoc.createTextNode(detail.value.decode('utf-8')))
|
||||
|
||||
if detail.language:
|
||||
xcontent.setAttribute('xml:lang', detail.language)
|
||||
|
||||
xentry.appendChild(xcontent)
|
||||
|
||||
def source(xentry, source, bozo):
|
||||
""" copy source information to the entry """
|
||||
xdoc = xentry.ownerDocument
|
||||
xsource = xdoc.createElement('source')
|
||||
|
||||
createTextElement(xsource, 'id', source.get('id', None))
|
||||
createTextElement(xsource, 'icon', source.get('icon', None))
|
||||
createTextElement(xsource, 'logo', source.get('logo', None))
|
||||
|
||||
author(xsource, 'author', source.get('author_detail',None))
|
||||
for contributor in source.get('contributors',[]):
|
||||
author(xsource, 'contributor', contributor)
|
||||
|
||||
links(xsource, source)
|
||||
|
||||
content(xsource, 'rights', source.get('rights_detail',None), bozo)
|
||||
content(xsource, 'subtitle', source.get('subtitle_detail',None), bozo)
|
||||
content(xsource, 'title', source.get('title_detail',None), bozo)
|
||||
|
||||
date(xsource, 'updated', source.get('updated_parsed',None))
|
||||
|
||||
# propagate planet inserted information
|
||||
for key, value in source.items():
|
||||
if key.startswith('planet:'):
|
||||
createTextElement(xsource, key, value)
|
||||
|
||||
xentry.appendChild(xsource)
|
||||
|
||||
def reconstitute(feed, entry):
|
||||
""" create an entry document from a parsed feed """
|
||||
xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
|
||||
xentry=xdoc.documentElement
|
||||
xentry.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
|
||||
|
||||
id(xentry, entry)
|
||||
links(xentry, entry)
|
||||
|
||||
bozo = feed.bozo
|
||||
content(xentry, 'title', entry.get('title_detail',None), bozo)
|
||||
content(xentry, 'summary', entry.get('summary_detail',None), bozo)
|
||||
content(xentry, 'content', entry.get('content',[None])[0], bozo)
|
||||
content(xentry, 'rights', entry.get('rights_detail',None), bozo)
|
||||
|
||||
date(xentry, 'updated', entry.get('updated_parsed',time.gmtime()))
|
||||
date(xentry, 'published', entry.get('published_parsed',None))
|
||||
|
||||
author(xentry, 'author', entry.get('author_detail',None))
|
||||
for contributor in entry.get('contributors',[]):
|
||||
author(xentry, 'contributor', contributor)
|
||||
|
||||
source(xentry, entry.get('source', feed.feed), bozo)
|
||||
|
||||
return xdoc
|
86
planet/spider.py
Normal file
86
planet/spider.py
Normal file
@ -0,0 +1,86 @@
|
||||
"""
|
||||
Fetch either a single feed, or a set of feeds, normalize to Atom and XHTML,
|
||||
and write each as a set of entries in a cache directory.
|
||||
"""
|
||||
|
||||
from planet import config, feedparser, reconstitute
|
||||
import time, calendar, re, os
|
||||
|
||||
try:
|
||||
from xml.dom.ext import PrettyPrint
|
||||
except:
|
||||
PrettyPrint = None
|
||||
|
||||
# Regular expressions to sanitise cache filenames
|
||||
re_url_scheme = re.compile(r'^[^:]*://')
|
||||
re_slash = re.compile(r'[?/]+')
|
||||
re_initial_cruft = re.compile(r'^[,.]*')
|
||||
re_final_cruft = re.compile(r'[,.]*$')
|
||||
|
||||
def filename(directory, filename):
|
||||
"""Return a filename suitable for the cache.
|
||||
|
||||
Strips dangerous and common characters to create a filename we
|
||||
can use to store the cache in.
|
||||
"""
|
||||
try:
|
||||
if re_url_scheme.match(filename):
|
||||
if isinstance(filename,str):
|
||||
filename=filename.decode('utf-8').encode('idna')
|
||||
else:
|
||||
filename=filename.encode('idna')
|
||||
except:
|
||||
pass
|
||||
filename = re_url_scheme.sub("", filename)
|
||||
filename = re_slash.sub(",", filename)
|
||||
filename = re_initial_cruft.sub("", filename)
|
||||
filename = re_final_cruft.sub("", filename)
|
||||
|
||||
return os.path.join(directory, filename)
|
||||
|
||||
def spiderFeed(feed):
|
||||
""" Spider (fetch) a single feed """
|
||||
data = feedparser.parse(feed)
|
||||
cache = config.cache_directory()
|
||||
|
||||
# capture data from the planet configuration file
|
||||
for name, value in config.feed_options(feed).items():
|
||||
data.feed['planet:'+name] = value
|
||||
|
||||
for entry in data.entries:
|
||||
if not entry.has_key('id'):
|
||||
entry['id'] = reconstitute.id(None, entry)
|
||||
if not entry['id']: continue
|
||||
|
||||
out = filename(cache, entry.id)
|
||||
|
||||
if entry.has_key('updated_parsed'):
|
||||
mtime = calendar.timegm(entry.updated_parsed)
|
||||
else:
|
||||
try:
|
||||
mtime = os.stat(out).st_mtime
|
||||
except:
|
||||
mtime = time.time()
|
||||
entry['updated_parsed'] = time.gmtime(mtime)
|
||||
|
||||
xml = reconstitute.reconstitute(data, entry)
|
||||
|
||||
file = open(out,'w')
|
||||
if PrettyPrint:
|
||||
PrettyPrint(reconstitute.reconstitute(data, entry), file)
|
||||
else:
|
||||
file.write(reconstitute.reconstitute(data, entry).toxml('utf-8'))
|
||||
file.close()
|
||||
|
||||
os.utime(out, (mtime, mtime))
|
||||
|
||||
def spiderPlanet(configFile):
|
||||
""" Spider (fetch) an entire planet """
|
||||
import planet
|
||||
config.load(configFile)
|
||||
log = planet.getLogger(config.log_level())
|
||||
planet.setTimeout(config.feed_timeout())
|
||||
|
||||
for feed in config.feeds():
|
||||
log.info("Updating feed %s", feed)
|
||||
spiderFeed(feed)
|
46
planet/splice.py
Normal file
46
planet/splice.py
Normal file
@ -0,0 +1,46 @@
|
||||
""" Splice together a planet from a cache of feed entries """
|
||||
import glob, os
|
||||
from planet import config
|
||||
from xml.dom import minidom
|
||||
from reconstitute import createTextElement
|
||||
|
||||
def splice(configFile):
|
||||
""" Splice together a planet from a cache of entries """
|
||||
import planet
|
||||
config.load(configFile)
|
||||
log = planet.getLogger(config.log_level())
|
||||
|
||||
cache = config.cache_directory()
|
||||
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")]
|
||||
dir.sort()
|
||||
dir.reverse()
|
||||
|
||||
items=max([config.items_per_page(templ)
|
||||
for templ in config.template_files()])
|
||||
|
||||
doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>')
|
||||
feed = doc.documentElement
|
||||
|
||||
# insert feed information
|
||||
createTextElement(feed, 'title', config.name())
|
||||
|
||||
# insert entry information
|
||||
for mtime,file in dir[:items]:
|
||||
entry=minidom.parse(file)
|
||||
feed.appendChild(entry.documentElement)
|
||||
|
||||
# insert subscription information
|
||||
feed.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
|
||||
for sub in config.feeds():
|
||||
name = config.feed_options(sub).get('name','')
|
||||
xsub = doc.createElement('planet:subscription')
|
||||
xlink = doc.createElement('link')
|
||||
xlink.setAttribute('rel','self')
|
||||
xlink.setAttribute('href',sub.decode('utf-8'))
|
||||
xsub.appendChild(xlink)
|
||||
xname = doc.createElement('planet:name')
|
||||
xname.appendChild(doc.createTextNode(name.decode('utf-8')))
|
||||
xsub.appendChild(xname)
|
||||
feed.appendChild(xsub)
|
||||
|
||||
return doc
|
424
planet/timeoutsocket.py
Normal file
424
planet/timeoutsocket.py
Normal file
@ -0,0 +1,424 @@
|
||||
|
||||
####
|
||||
# Copyright 2000,2001 by Timothy O'Malley <timo@alum.mit.edu>
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software
|
||||
# and its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Timothy O'Malley not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
|
||||
# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
# PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
####
|
||||
|
||||
"""Timeout Socket
|
||||
|
||||
This module enables a timeout mechanism on all TCP connections. It
|
||||
does this by inserting a shim into the socket module. After this module
|
||||
has been imported, all socket creation goes through this shim. As a
|
||||
result, every TCP connection will support a timeout.
|
||||
|
||||
The beauty of this method is that it immediately and transparently
|
||||
enables the entire python library to support timeouts on TCP sockets.
|
||||
As an example, if you wanted to SMTP connections to have a 20 second
|
||||
timeout:
|
||||
|
||||
import timeoutsocket
|
||||
import smtplib
|
||||
timeoutsocket.setDefaultSocketTimeout(20)
|
||||
|
||||
|
||||
The timeout applies to the socket functions that normally block on
|
||||
execution: read, write, connect, and accept. If any of these
|
||||
operations exceeds the specified timeout, the exception Timeout
|
||||
will be raised.
|
||||
|
||||
The default timeout value is set to None. As a result, importing
|
||||
this module does not change the default behavior of a socket. The
|
||||
timeout mechanism only activates when the timeout has been set to
|
||||
a numeric value. (This behavior mimics the behavior of the
|
||||
select.select() function.)
|
||||
|
||||
This module implements two classes: TimeoutSocket and TimeoutFile.
|
||||
|
||||
The TimeoutSocket class defines a socket-like object that attempts to
|
||||
avoid the condition where a socket may block indefinitely. The
|
||||
TimeoutSocket class raises a Timeout exception whenever the
|
||||
current operation delays too long.
|
||||
|
||||
The TimeoutFile class defines a file-like object that uses the TimeoutSocket
|
||||
class. When the makefile() method of TimeoutSocket is called, it returns
|
||||
an instance of a TimeoutFile.
|
||||
|
||||
Each of these objects adds two methods to manage the timeout value:
|
||||
|
||||
get_timeout() --> returns the timeout of the socket or file
|
||||
set_timeout() --> sets the timeout of the socket or file
|
||||
|
||||
|
||||
As an example, one might use the timeout feature to create httplib
|
||||
connections that will timeout after 30 seconds:
|
||||
|
||||
import timeoutsocket
|
||||
import httplib
|
||||
H = httplib.HTTP("www.python.org")
|
||||
H.sock.set_timeout(30)
|
||||
|
||||
Note: When used in this manner, the connect() routine may still
|
||||
block because it happens before the timeout is set. To avoid
|
||||
this, use the 'timeoutsocket.setDefaultSocketTimeout()' function.
|
||||
|
||||
Good Luck!
|
||||
|
||||
"""
|
||||
|
||||
__version__ = "$Revision: 1.1.1.1 $"
|
||||
__author__ = "Timothy O'Malley <timo@alum.mit.edu>"
|
||||
|
||||
#
|
||||
# Imports
|
||||
#
|
||||
import select, string
|
||||
import socket
|
||||
if not hasattr(socket, "_no_timeoutsocket"):
|
||||
_socket = socket.socket
|
||||
else:
|
||||
_socket = socket._no_timeoutsocket
|
||||
|
||||
|
||||
#
|
||||
# Set up constants to test for Connected and Blocking operations.
|
||||
# We delete 'os' and 'errno' to keep our namespace clean(er).
|
||||
# Thanks to Alex Martelli and G. Li for the Windows error codes.
|
||||
#
|
||||
import os
|
||||
if os.name == "nt":
|
||||
_IsConnected = ( 10022, 10056 )
|
||||
_ConnectBusy = ( 10035, )
|
||||
_AcceptBusy = ( 10035, )
|
||||
else:
|
||||
import errno
|
||||
_IsConnected = ( errno.EISCONN, )
|
||||
_ConnectBusy = ( errno.EINPROGRESS, errno.EALREADY, errno.EWOULDBLOCK )
|
||||
_AcceptBusy = ( errno.EAGAIN, errno.EWOULDBLOCK )
|
||||
del errno
|
||||
del os
|
||||
|
||||
|
||||
#
|
||||
# Default timeout value for ALL TimeoutSockets
|
||||
#
|
||||
_DefaultTimeout = None
|
||||
def setDefaultSocketTimeout(timeout):
|
||||
global _DefaultTimeout
|
||||
_DefaultTimeout = timeout
|
||||
def getDefaultSocketTimeout():
|
||||
return _DefaultTimeout
|
||||
|
||||
#
|
||||
# Exceptions for socket errors and timeouts
|
||||
#
|
||||
Error = socket.error
|
||||
class Timeout(Exception):
|
||||
pass
|
||||
|
||||
|
||||
#
|
||||
# Factory function
|
||||
#
|
||||
from socket import AF_INET, SOCK_STREAM
|
||||
def timeoutsocket(family=AF_INET, type=SOCK_STREAM, proto=None):
|
||||
if family != AF_INET or type != SOCK_STREAM:
|
||||
if proto:
|
||||
return _socket(family, type, proto)
|
||||
else:
|
||||
return _socket(family, type)
|
||||
return TimeoutSocket( _socket(family, type), _DefaultTimeout )
|
||||
# end timeoutsocket
|
||||
|
||||
#
|
||||
# The TimeoutSocket class definition
|
||||
#
|
||||
class TimeoutSocket:
|
||||
"""TimeoutSocket object
|
||||
Implements a socket-like object that raises Timeout whenever
|
||||
an operation takes too long.
|
||||
The definition of 'too long' can be changed using the
|
||||
set_timeout() method.
|
||||
"""
|
||||
|
||||
_copies = 0
|
||||
_blocking = 1
|
||||
|
||||
def __init__(self, sock, timeout):
|
||||
self._sock = sock
|
||||
self._timeout = timeout
|
||||
# end __init__
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self._sock, key)
|
||||
# end __getattr__
|
||||
|
||||
def get_timeout(self):
|
||||
return self._timeout
|
||||
# end set_timeout
|
||||
|
||||
def set_timeout(self, timeout=None):
|
||||
self._timeout = timeout
|
||||
# end set_timeout
|
||||
|
||||
def setblocking(self, blocking):
|
||||
self._blocking = blocking
|
||||
return self._sock.setblocking(blocking)
|
||||
# end set_timeout
|
||||
|
||||
def connect_ex(self, addr):
|
||||
errcode = 0
|
||||
try:
|
||||
self.connect(addr)
|
||||
except Error, why:
|
||||
errcode = why[0]
|
||||
return errcode
|
||||
# end connect_ex
|
||||
|
||||
def connect(self, addr, port=None, dumbhack=None):
|
||||
# In case we were called as connect(host, port)
|
||||
if port != None: addr = (addr, port)
|
||||
|
||||
# Shortcuts
|
||||
sock = self._sock
|
||||
timeout = self._timeout
|
||||
blocking = self._blocking
|
||||
|
||||
# First, make a non-blocking call to connect
|
||||
try:
|
||||
sock.setblocking(0)
|
||||
sock.connect(addr)
|
||||
sock.setblocking(blocking)
|
||||
return
|
||||
except Error, why:
|
||||
# Set the socket's blocking mode back
|
||||
sock.setblocking(blocking)
|
||||
|
||||
# If we are not blocking, re-raise
|
||||
if not blocking:
|
||||
raise
|
||||
|
||||
# If we are already connected, then return success.
|
||||
# If we got a genuine error, re-raise it.
|
||||
errcode = why[0]
|
||||
if dumbhack and errcode in _IsConnected:
|
||||
return
|
||||
elif errcode not in _ConnectBusy:
|
||||
raise
|
||||
|
||||
# Now, wait for the connect to happen
|
||||
# ONLY if dumbhack indicates this is pass number one.
|
||||
# If select raises an error, we pass it on.
|
||||
# Is this the right behavior?
|
||||
if not dumbhack:
|
||||
r,w,e = select.select([], [sock], [], timeout)
|
||||
if w:
|
||||
return self.connect(addr, dumbhack=1)
|
||||
|
||||
# If we get here, then we should raise Timeout
|
||||
raise Timeout("Attempted connect to %s timed out." % str(addr) )
|
||||
# end connect
|
||||
|
||||
def accept(self, dumbhack=None):
|
||||
# Shortcuts
|
||||
sock = self._sock
|
||||
timeout = self._timeout
|
||||
blocking = self._blocking
|
||||
|
||||
# First, make a non-blocking call to accept
|
||||
# If we get a valid result, then convert the
|
||||
# accept'ed socket into a TimeoutSocket.
|
||||
# Be carefult about the blocking mode of ourselves.
|
||||
try:
|
||||
sock.setblocking(0)
|
||||
newsock, addr = sock.accept()
|
||||
sock.setblocking(blocking)
|
||||
timeoutnewsock = self.__class__(newsock, timeout)
|
||||
timeoutnewsock.setblocking(blocking)
|
||||
return (timeoutnewsock, addr)
|
||||
except Error, why:
|
||||
# Set the socket's blocking mode back
|
||||
sock.setblocking(blocking)
|
||||
|
||||
# If we are not supposed to block, then re-raise
|
||||
if not blocking:
|
||||
raise
|
||||
|
||||
# If we got a genuine error, re-raise it.
|
||||
errcode = why[0]
|
||||
if errcode not in _AcceptBusy:
|
||||
raise
|
||||
|
||||
# Now, wait for the accept to happen
|
||||
# ONLY if dumbhack indicates this is pass number one.
|
||||
# If select raises an error, we pass it on.
|
||||
# Is this the right behavior?
|
||||
if not dumbhack:
|
||||
r,w,e = select.select([sock], [], [], timeout)
|
||||
if r:
|
||||
return self.accept(dumbhack=1)
|
||||
|
||||
# If we get here, then we should raise Timeout
|
||||
raise Timeout("Attempted accept timed out.")
|
||||
# end accept
|
||||
|
||||
def send(self, data, flags=0):
|
||||
sock = self._sock
|
||||
if self._blocking:
|
||||
r,w,e = select.select([],[sock],[], self._timeout)
|
||||
if not w:
|
||||
raise Timeout("Send timed out")
|
||||
return sock.send(data, flags)
|
||||
# end send
|
||||
|
||||
def recv(self, bufsize, flags=0):
|
||||
sock = self._sock
|
||||
if self._blocking:
|
||||
r,w,e = select.select([sock], [], [], self._timeout)
|
||||
if not r:
|
||||
raise Timeout("Recv timed out")
|
||||
return sock.recv(bufsize, flags)
|
||||
# end recv
|
||||
|
||||
def makefile(self, flags="r", bufsize=-1):
|
||||
self._copies = self._copies +1
|
||||
return TimeoutFile(self, flags, bufsize)
|
||||
# end makefile
|
||||
|
||||
def close(self):
|
||||
if self._copies <= 0:
|
||||
self._sock.close()
|
||||
else:
|
||||
self._copies = self._copies -1
|
||||
# end close
|
||||
|
||||
# end TimeoutSocket
|
||||
|
||||
|
||||
class TimeoutFile:
|
||||
"""TimeoutFile object
|
||||
Implements a file-like object on top of TimeoutSocket.
|
||||
"""
|
||||
|
||||
def __init__(self, sock, mode="r", bufsize=4096):
|
||||
self._sock = sock
|
||||
self._bufsize = 4096
|
||||
if bufsize > 0: self._bufsize = bufsize
|
||||
if not hasattr(sock, "_inqueue"): self._sock._inqueue = ""
|
||||
|
||||
# end __init__
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self._sock, key)
|
||||
# end __getattr__
|
||||
|
||||
def close(self):
|
||||
self._sock.close()
|
||||
self._sock = None
|
||||
# end close
|
||||
|
||||
def write(self, data):
|
||||
self.send(data)
|
||||
# end write
|
||||
|
||||
def read(self, size=-1):
|
||||
_sock = self._sock
|
||||
_bufsize = self._bufsize
|
||||
while 1:
|
||||
datalen = len(_sock._inqueue)
|
||||
if datalen >= size >= 0:
|
||||
break
|
||||
bufsize = _bufsize
|
||||
if size > 0:
|
||||
bufsize = min(bufsize, size - datalen )
|
||||
buf = self.recv(bufsize)
|
||||
if not buf:
|
||||
break
|
||||
_sock._inqueue = _sock._inqueue + buf
|
||||
data = _sock._inqueue
|
||||
_sock._inqueue = ""
|
||||
if size > 0 and datalen > size:
|
||||
_sock._inqueue = data[size:]
|
||||
data = data[:size]
|
||||
return data
|
||||
# end read
|
||||
|
||||
def readline(self, size=-1):
|
||||
_sock = self._sock
|
||||
_bufsize = self._bufsize
|
||||
while 1:
|
||||
idx = string.find(_sock._inqueue, "\n")
|
||||
if idx >= 0:
|
||||
break
|
||||
datalen = len(_sock._inqueue)
|
||||
if datalen >= size >= 0:
|
||||
break
|
||||
bufsize = _bufsize
|
||||
if size > 0:
|
||||
bufsize = min(bufsize, size - datalen )
|
||||
buf = self.recv(bufsize)
|
||||
if not buf:
|
||||
break
|
||||
_sock._inqueue = _sock._inqueue + buf
|
||||
|
||||
data = _sock._inqueue
|
||||
_sock._inqueue = ""
|
||||
if idx >= 0:
|
||||
idx = idx + 1
|
||||
_sock._inqueue = data[idx:]
|
||||
data = data[:idx]
|
||||
elif size > 0 and datalen > size:
|
||||
_sock._inqueue = data[size:]
|
||||
data = data[:size]
|
||||
return data
|
||||
# end readline
|
||||
|
||||
def readlines(self, sizehint=-1):
|
||||
result = []
|
||||
data = self.read()
|
||||
while data:
|
||||
idx = string.find(data, "\n")
|
||||
if idx >= 0:
|
||||
idx = idx + 1
|
||||
result.append( data[:idx] )
|
||||
data = data[idx:]
|
||||
else:
|
||||
result.append( data )
|
||||
data = ""
|
||||
return result
|
||||
# end readlines
|
||||
|
||||
def flush(self): pass
|
||||
|
||||
# end TimeoutFile
|
||||
|
||||
|
||||
#
|
||||
# Silently replace the socket() builtin function with
|
||||
# our timeoutsocket() definition.
|
||||
#
|
||||
if not hasattr(socket, "_no_timeoutsocket"):
|
||||
socket._no_timeoutsocket = socket.socket
|
||||
socket.socket = timeoutsocket
|
||||
del socket
|
||||
socket = timeoutsocket
|
||||
# Finis
|
11
runtests.py
Executable file
11
runtests.py
Executable file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
import glob, trace, unittest
|
||||
|
||||
# find all of the planet test modules
|
||||
modules = map(trace.fullmodname, glob.glob('tests/test_*.py'))
|
||||
|
||||
# load all of the tests into a suite
|
||||
suite = unittest.TestLoader().loadTestsFromNames(modules)
|
||||
|
||||
# run test suite
|
||||
unittest.TextTestRunner().run(suite)
|
20
spider.py
Normal file
20
spider.py
Normal file
@ -0,0 +1,20 @@
|
||||
"""
|
||||
Main program to run just the spider portion of planet
|
||||
"""
|
||||
|
||||
import sys
|
||||
from planet import spider, config
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if len(sys.argv) == 2:
|
||||
# spider all feeds
|
||||
spider.spiderPlanet(sys.argv[1])
|
||||
elif len(sys.argv) > 2 and os.path.isdir(sys.argv[1]):
|
||||
# spider selected feeds
|
||||
config.load(sys.argv[1])
|
||||
for feed in sys.argv[2:]:
|
||||
spider.spiderFeed(feed)
|
||||
else:
|
||||
print "Usage:"
|
||||
print " python %s config.ini [URI URI ...]" % sys.argv[0]
|
21
splice.py
Normal file
21
splice.py
Normal file
@ -0,0 +1,21 @@
|
||||
"""
|
||||
Main program to run just the splice portion of planet
|
||||
"""
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
from planet import splice
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]):
|
||||
# at the moment, we don't have template support, so we cheat and
|
||||
# simply insert a XSLT processing instruction
|
||||
doc = splice.splice(sys.argv[1])
|
||||
pi = doc.createProcessingInstruction(
|
||||
'xml-stylesheet','type="text/xsl" href="planet.xslt"')
|
||||
doc.insertBefore(pi, doc.firstChild)
|
||||
print doc.toxml('utf-8')
|
||||
else:
|
||||
print "Usage:"
|
||||
print " python %s config.ini" % sys.argv[0]
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
13
tests/data/config/basic.ini
Normal file
13
tests/data/config/basic.ini
Normal file
@ -0,0 +1,13 @@
|
||||
[Planet]
|
||||
name = Test Configuration
|
||||
template_files = index.html.tmpl atom.xml.tmpl
|
||||
items_per_page = 50
|
||||
|
||||
[index.html.tmpl]
|
||||
days_per_page = 7
|
||||
|
||||
[feed1]
|
||||
name = one
|
||||
|
||||
[feed2]
|
||||
name = two
|
13
tests/data/reconstitute/author_email.xml
Normal file
13
tests/data/reconstitute/author_email.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: author name
|
||||
Expect: author_detail.email == 'john@example.com'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<author>
|
||||
<email>john@example.com</email>
|
||||
</author>
|
||||
</entry>
|
||||
</feed>
|
||||
|
13
tests/data/reconstitute/author_name.xml
Normal file
13
tests/data/reconstitute/author_name.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: author name
|
||||
Expect: author_detail.name == 'John Doe'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
</entry>
|
||||
</feed>
|
||||
|
13
tests/data/reconstitute/author_uri.xml
Normal file
13
tests/data/reconstitute/author_uri.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: author name
|
||||
Expect: author_detail.href == 'http://example.com/~john/'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<author>
|
||||
<uri>http://example.com/~john/</uri>
|
||||
</author>
|
||||
</entry>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/content_html.xml
Normal file
10
tests/data/reconstitute/content_html.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: entity encoded html content
|
||||
Expect: content[0].value == u'D\xe9tente' and content[0].type=='text/plain'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<content type="html">D&eacute;tente</content>
|
||||
</entry>
|
||||
</feed>
|
10
tests/data/reconstitute/content_illegal_char.xml
Normal file
10
tests/data/reconstitute/content_illegal_char.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: illegal control character
|
||||
Expect: content[0].value == u'Page 1<acronym title="U+000c">\ufffd</acronym>Page 2'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<content type="html">Page 1Page 2</content>
|
||||
</entry>
|
||||
</feed>
|
10
tests/data/reconstitute/content_lang.xml
Normal file
10
tests/data/reconstitute/content_lang.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: content value
|
||||
Expect: content[0].language == 'en-us'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<content xml:lang="en-us">foo</content>
|
||||
</entry>
|
||||
</feed>
|
10
tests/data/reconstitute/content_tag_soup.xml
Normal file
10
tests/data/reconstitute/content_tag_soup.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: improperly nested tags
|
||||
Expect: content[0].value == 'This is <b><i>very</i></b> confused'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<content type="html">This is <B><i;>very</b></I> confused</content>
|
||||
</entry>
|
||||
</feed>
|
10
tests/data/reconstitute/content_text.xml
Normal file
10
tests/data/reconstitute/content_text.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: plain text content
|
||||
Expect: content[0].value == 'AT&T'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<content type="text">AT&T</content>
|
||||
</entry>
|
||||
</feed>
|
13
tests/data/reconstitute/content_xhtml.xml
Normal file
13
tests/data/reconstitute/content_xhtml.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: xhtml content
|
||||
Expect: content[0].value == 'A <b>very</b> bad day'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<content type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">A <b>very</b> bad day</div>
|
||||
</content>
|
||||
</entry>
|
||||
</feed>
|
||||
|
13
tests/data/reconstitute/contributor_email.xml
Normal file
13
tests/data/reconstitute/contributor_email.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: contributor name
|
||||
Expect: contributors[0].email == 'john@example.com'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<contributor>
|
||||
<email>john@example.com</email>
|
||||
</contributor>
|
||||
</entry>
|
||||
</feed>
|
||||
|
13
tests/data/reconstitute/contributor_name.xml
Normal file
13
tests/data/reconstitute/contributor_name.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: contributor name
|
||||
Expect: contributors[0].name == 'John Doe'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<contributor>
|
||||
<name>John Doe</name>
|
||||
</contributor>
|
||||
</entry>
|
||||
</feed>
|
||||
|
13
tests/data/reconstitute/contributor_uri.xml
Normal file
13
tests/data/reconstitute/contributor_uri.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: contributor name
|
||||
Expect: contributors[0].href == 'http://example.com/~john/'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<contributor>
|
||||
<uri>http://example.com/~john/</uri>
|
||||
</contributor>
|
||||
</entry>
|
||||
</feed>
|
||||
|
11
tests/data/reconstitute/id.xml
Normal file
11
tests/data/reconstitute/id.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<!--
|
||||
Description: id
|
||||
Expect: id == 'http://example.com/1'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<id>http://example.com/1</id>
|
||||
</entry>
|
||||
</feed>
|
||||
|
13
tests/data/reconstitute/id_only_content.xml
Normal file
13
tests/data/reconstitute/id_only_content.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: id generated from content
|
||||
Expect: id == 'http://example.com//9a0364b9e99bb480dd25e1f0284c8555'
|
||||
-->
|
||||
|
||||
<rss xml:base="http://example.com/">
|
||||
<channel>
|
||||
<item xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
||||
<content:encoded>content</content>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
13
tests/data/reconstitute/id_only_description.xml
Normal file
13
tests/data/reconstitute/id_only_description.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: id generated from description
|
||||
Expect: id == 'http://example.com//67daf92c833c41c95db874e18fcb2786'
|
||||
-->
|
||||
|
||||
<rss xml:base="http://example.com/">
|
||||
<channel>
|
||||
<item>
|
||||
<description>description</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
13
tests/data/reconstitute/id_only_link.xml
Normal file
13
tests/data/reconstitute/id_only_link.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: id generated from link
|
||||
Expect: id == 'http://example.com/1'
|
||||
-->
|
||||
|
||||
<rss>
|
||||
<channel>
|
||||
<item>
|
||||
<link>http://example.com/1</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
13
tests/data/reconstitute/id_only_title.xml
Normal file
13
tests/data/reconstitute/id_only_title.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: id generated from title
|
||||
Expect: id == 'http://example.com//d5d3db1765287eef77d7927cc956f50a'
|
||||
-->
|
||||
|
||||
<rss xml:base="http://example.com/">
|
||||
<channel>
|
||||
<item>
|
||||
<title>title</title>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
11
tests/data/reconstitute/link_href.xml
Normal file
11
tests/data/reconstitute/link_href.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<!--
|
||||
Description: link relationship
|
||||
Expect: links[0].href == 'http://example.com/1'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<link href="http://example.com/1"/>
|
||||
</entry>
|
||||
</feed>
|
||||
|
11
tests/data/reconstitute/link_rel.xml
Normal file
11
tests/data/reconstitute/link_rel.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<!--
|
||||
Description: link relationship
|
||||
Expect: links[0].rel == 'alternate'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<link href="http://example.com/1"/>
|
||||
</entry>
|
||||
</feed>
|
||||
|
11
tests/data/reconstitute/link_type.xml
Normal file
11
tests/data/reconstitute/link_type.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<!--
|
||||
Description: link relationship
|
||||
Expect: links[0].type == 'text/html'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<link href="http://example.com/1"/>
|
||||
</entry>
|
||||
</feed>
|
||||
|
11
tests/data/reconstitute/published.xml
Normal file
11
tests/data/reconstitute/published.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<!--
|
||||
Description: published, rollover past midnight on feb 28 in leap year
|
||||
Expect: published_parsed == (2004, 2, 29, 2, 14, 55, 6, 60, 0)
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<published>2004-02-28T18:14:55-08:00</published>
|
||||
</entry>
|
||||
</feed>
|
||||
|
11
tests/data/reconstitute/rights.xml
Normal file
11
tests/data/reconstitute/rights.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<!--
|
||||
Description: rights
|
||||
Expect: rights == u'\xa9 2006'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<rights type="html">&copy; 2006</rights>
|
||||
</entry>
|
||||
</feed>
|
||||
|
12
tests/data/reconstitute/source_author.xml
Normal file
12
tests/data/reconstitute/source_author.xml
Normal file
@ -0,0 +1,12 @@
|
||||
<!--
|
||||
Description: source author
|
||||
Expect: source.author_detail.name == 'John Doe'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
12
tests/data/reconstitute/source_contributor.xml
Normal file
12
tests/data/reconstitute/source_contributor.xml
Normal file
@ -0,0 +1,12 @@
|
||||
<!--
|
||||
Description: source contributor
|
||||
Expect: source.contributors[0].name == 'John Doe'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<contributor>
|
||||
<name>John Doe</name>
|
||||
</contributor>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/source_icon.xml
Normal file
10
tests/data/reconstitute/source_icon.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: source icon
|
||||
Expect: source.icon == 'http://www.example.com/favicon.ico'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<icon>http://www.example.com/favicon.ico</icon>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/source_id.xml
Normal file
10
tests/data/reconstitute/source_id.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: source id
|
||||
Expect: source.id == 'http://example.com/'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>http://example.com/</id>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/source_link.xml
Normal file
10
tests/data/reconstitute/source_link.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: source link
|
||||
Expect: source.links[0].href == 'http://example.com/atom.xml'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel='self' href='http://example.com/atom.xml'/>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/source_logo.xml
Normal file
10
tests/data/reconstitute/source_logo.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: source logo
|
||||
Expect: source.logo == 'http://www.example.com/logo.jpg'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<logo>http://www.example.com/logo.jpg</logo>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/source_rights.xml
Normal file
10
tests/data/reconstitute/source_rights.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: source rights
|
||||
Expect: source.rights == u'\xa9 2006'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<rights type="html">&copy; 2006</rights>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/source_subtitle.xml
Normal file
10
tests/data/reconstitute/source_subtitle.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: source subtitle
|
||||
Expect: source.subtitle == 'snarky phrase'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<subtitle>snarky phrase</subtitle>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/source_title.xml
Normal file
10
tests/data/reconstitute/source_title.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: source title
|
||||
Expect: source.title == 'visible name'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>visible name</title>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/source_updated.xml
Normal file
10
tests/data/reconstitute/source_updated.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: source updated, rollover past midnight on feb 28 in leap year
|
||||
Expect: source.updated_parsed == (2004, 2, 29, 2, 14, 55, 6, 60, 0)
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<updated>2004-02-28T18:14:55-08:00</updated>
|
||||
<entry/>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/summary_html.xml
Normal file
10
tests/data/reconstitute/summary_html.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: entity encoded html summary
|
||||
Expect: summary_detail.value == u'D\xe9tente' and summary_detail.type=='text/plain'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<summary type="html">D&eacute;tente</summary>
|
||||
</entry>
|
||||
</feed>
|
10
tests/data/reconstitute/summary_lang.xml
Normal file
10
tests/data/reconstitute/summary_lang.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: summary value
|
||||
Expect: summary_detail.language == 'en-us'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<summary xml:lang="en-us">foo</summary>
|
||||
</entry>
|
||||
</feed>
|
10
tests/data/reconstitute/summary_text.xml
Normal file
10
tests/data/reconstitute/summary_text.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: plain text summary
|
||||
Expect: summary_detail.value == 'AT&T'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<summary type="text">AT&T</summary>
|
||||
</entry>
|
||||
</feed>
|
13
tests/data/reconstitute/summary_xhtml.xml
Normal file
13
tests/data/reconstitute/summary_xhtml.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: xhtml summary
|
||||
Expect: summary_detail.value == 'A <b>very</b> bad day'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<summary type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">A <b>very</b> bad day</div>
|
||||
</summary>
|
||||
</entry>
|
||||
</feed>
|
||||
|
10
tests/data/reconstitute/title_html.xml
Normal file
10
tests/data/reconstitute/title_html.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: entity encoded html title
|
||||
Expect: title_detail.value == u'D\xe9tente' and title_detail.type=='text/plain'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<title type="html">D&eacute;tente</title>
|
||||
</entry>
|
||||
</feed>
|
10
tests/data/reconstitute/title_lang.xml
Normal file
10
tests/data/reconstitute/title_lang.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: title value
|
||||
Expect: title_detail.language == 'en-us'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<title xml:lang="en-us">foo</title>
|
||||
</entry>
|
||||
</feed>
|
10
tests/data/reconstitute/title_text.xml
Normal file
10
tests/data/reconstitute/title_text.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<!--
|
||||
Description: plain text title
|
||||
Expect: title_detail.value == 'AT&T'
|
||||
-->
|
||||
|
||||
<feed xmns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<title type="text">AT&T</title>
|
||||
</entry>
|
||||
</feed>
|
13
tests/data/reconstitute/title_xhtml.xml
Normal file
13
tests/data/reconstitute/title_xhtml.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<!--
|
||||
Description: xhtml title
|
||||
Expect: title_detail.value == 'A <b>very</b> bad day'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<title type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">A <b>very</b> bad day</div>
|
||||
</title>
|
||||
</entry>
|
||||
</feed>
|
||||
|
11
tests/data/reconstitute/updated.xml
Normal file
11
tests/data/reconstitute/updated.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<!--
|
||||
Description: updated, rollover past midnight on feb 28 in leap year
|
||||
Expect: updated_parsed == (2004, 2, 29, 2, 14, 55, 6, 60, 0)
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<updated>2004-02-28T18:14:55-08:00</updated>
|
||||
</entry>
|
||||
</feed>
|
||||
|
12
tests/data/spider/config.ini
Normal file
12
tests/data/spider/config.ini
Normal file
@ -0,0 +1,12 @@
|
||||
[Planet]
|
||||
cache_directory = tests/work/spider/cache
|
||||
template_files =
|
||||
|
||||
[tests/data/spider/testfeed1b.atom]
|
||||
name = one
|
||||
|
||||
[tests/data/spider/testfeed2.atom]
|
||||
name = two
|
||||
|
||||
[tests/data/spider/testfeed3.rss]
|
||||
name = three
|
49
tests/data/spider/testfeed1a.atom
Normal file
49
tests/data/spider/testfeed1a.atom
Normal file
@ -0,0 +1,49 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel="self" href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom"/>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
|
||||
|
||||
<title>Sam Ruby</title>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<updated>2006-06-16T20:15:18-04:00</updated>
|
||||
<link href="http://www.intertwingly.net/blog/"/>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
|
||||
<link href="http://example.com/1"/>
|
||||
<title>Mercury</title>
|
||||
<content>Messenger of the Roman Gods</content>
|
||||
<updated>2006-01-01T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
|
||||
<link href="http://example.com/2"/>
|
||||
<title>Venus</title>
|
||||
<content>the Morning Star</content>
|
||||
<updated>2006-01-02T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
|
||||
<link href="http://example.com/3"/>
|
||||
<title>Earth</title>
|
||||
<content>the Blue Planet</content>
|
||||
<updated>2006-01-03T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
|
||||
<link href="http://example.com/4"/>
|
||||
<title>Mars</title>
|
||||
<content>the Red Planet</content>
|
||||
<updated>2006-01-04T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
</feed>
|
||||
|
50
tests/data/spider/testfeed1b.atom
Normal file
50
tests/data/spider/testfeed1b.atom
Normal file
@ -0,0 +1,50 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel="self" href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom"/>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
|
||||
|
||||
<title>Sam Ruby</title>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<updated>2006-06-16T20:15:18-04:00</updated>
|
||||
<link href="http://www.intertwingly.net/blog/"/>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
|
||||
<link href="http://example.com/1"/>
|
||||
<title>Mercury</title>
|
||||
<content>Messenger of the Roman Gods</content>
|
||||
<updated>2006-01-01T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
|
||||
<link href="http://example.com/2"/>
|
||||
<title>Venus</title>
|
||||
<content>the Jewel of the Sky</content>
|
||||
<published>2006-01-02T00:00:00Z</published>
|
||||
<updated>2006-02-02T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
|
||||
<link href="http://example.com/3"/>
|
||||
<title>Earth</title>
|
||||
<content>the Blue Planet</content>
|
||||
<updated>2006-01-03T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
|
||||
<link href="http://example.com/4"/>
|
||||
<title>Mars</title>
|
||||
<content>the Red Planet</content>
|
||||
<updated>2006-01-04T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
</feed>
|
||||
|
49
tests/data/spider/testfeed2.atom
Normal file
49
tests/data/spider/testfeed2.atom
Normal file
@ -0,0 +1,49 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel="self" href="http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom"/>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
|
||||
|
||||
<title>Sam Ruby</title>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<updated>2006-06-16T20:15:18-04:00</updated>
|
||||
<link href="http://www.intertwingly.net/blog/"/>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2/1</id>
|
||||
<link href="http://example.com/1"/>
|
||||
<title>Mercury</title>
|
||||
<content>Messenger of the Roman Gods</content>
|
||||
<updated>2006-01-01T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2/2</id>
|
||||
<link href="http://example.com/2"/>
|
||||
<title>Venus</title>
|
||||
<content>the Morning Star</content>
|
||||
<updated>2006-01-02T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2/3</id>
|
||||
<link href="http://example.com/3"/>
|
||||
<title>Earth</title>
|
||||
<content>the Blue Planet</content>
|
||||
<updated>2006-01-03T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
<entry>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2/4</id>
|
||||
<link href="http://example.com/4"/>
|
||||
<title>Mars</title>
|
||||
<content>the Red Planet</content>
|
||||
<updated>2006-01-04T00:00:00Z</updated>
|
||||
</entry>
|
||||
|
||||
</feed>
|
||||
|
37
tests/data/spider/testfeed3.rss
Normal file
37
tests/data/spider/testfeed3.rss
Normal file
@ -0,0 +1,37 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Sam Ruby</title>
|
||||
<link>http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss</link>
|
||||
<description>It’s just data</description>
|
||||
|
||||
<item>
|
||||
<guid>tag:planet.intertwingly.net,2006:testfeed3/1</guid>
|
||||
<link href="http://example.com/1"/>
|
||||
<title>Mercury</title>
|
||||
<description>Messenger of the Roman Gods</description>
|
||||
<pubDate>Sun, 01 Jan 2006 00:00:00 +0000</pubDate>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<guid>tag:planet.intertwingly.net,2006:testfeed3/2</guid>
|
||||
<link>http://example.com/2</link>
|
||||
<title>Venus</title>
|
||||
<description>the Morning Star</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<link>http://example.com/3</link>
|
||||
<title>Earth</title>
|
||||
<description>the Blue Planet</description>
|
||||
<pubDate>Tue, 03 Jan 2006 00:00:00 +0000</pubDate>
|
||||
</item>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.com/4"/>
|
||||
<title>Mars</title>
|
||||
<description>the Red Planet</description>
|
||||
</entry>
|
||||
|
||||
</feed>
|
||||
|
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,1
vendored
Normal file
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,1
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
|
||||
<link href='http://example.com/1' type='text/html' rel='alternate'/>
|
||||
<title>Mercury</title>
|
||||
<content>Messenger of the Roman Gods</content>
|
||||
<updated>2006-01-01T00:00:00Z</updated>
|
||||
<source>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>one</planet:name>
|
||||
</source>
|
||||
</entry>
|
23
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,2
vendored
Normal file
23
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,2
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
|
||||
<link href='http://example.com/2' type='text/html' rel='alternate'/>
|
||||
<title>Venus</title>
|
||||
<content>the Jewel of the Sky</content>
|
||||
<updated>2006-02-02T00:00:00Z</updated>
|
||||
<published>2006-01-02T00:00:00Z</published>
|
||||
<source>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>one</planet:name>
|
||||
</source>
|
||||
</entry>
|
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,3
vendored
Normal file
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,3
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
|
||||
<link href='http://example.com/3' type='text/html' rel='alternate'/>
|
||||
<title>Earth</title>
|
||||
<content>the Blue Planet</content>
|
||||
<updated>2006-01-03T00:00:00Z</updated>
|
||||
<source>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>one</planet:name>
|
||||
</source>
|
||||
</entry>
|
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,4
vendored
Normal file
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed1,4
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
|
||||
<link href='http://example.com/4' type='text/html' rel='alternate'/>
|
||||
<title>Mars</title>
|
||||
<content>the Red Planet</content>
|
||||
<updated>2006-01-04T00:00:00Z</updated>
|
||||
<source>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>one</planet:name>
|
||||
</source>
|
||||
</entry>
|
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,1
vendored
Normal file
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,1
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2/1</id>
|
||||
<link href='http://example.com/1' type='text/html' rel='alternate'/>
|
||||
<title>Mercury</title>
|
||||
<content>Messenger of the Roman Gods</content>
|
||||
<updated>2006-01-01T00:00:00Z</updated>
|
||||
<source>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>two</planet:name>
|
||||
</source>
|
||||
</entry>
|
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,2
vendored
Normal file
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,2
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2/2</id>
|
||||
<link href='http://example.com/2' type='text/html' rel='alternate'/>
|
||||
<title>Venus</title>
|
||||
<content>the Morning Star</content>
|
||||
<updated>2006-01-02T00:00:00Z</updated>
|
||||
<source>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>two</planet:name>
|
||||
</source>
|
||||
</entry>
|
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,3
vendored
Normal file
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,3
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2/3</id>
|
||||
<link href='http://example.com/3' type='text/html' rel='alternate'/>
|
||||
<title>Earth</title>
|
||||
<content>the Blue Planet</content>
|
||||
<updated>2006-01-03T00:00:00Z</updated>
|
||||
<source>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>two</planet:name>
|
||||
</source>
|
||||
</entry>
|
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,4
vendored
Normal file
22
tests/data/splice/cache/tag:planet.intertwingly.net,2006:testfeed2,4
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2/4</id>
|
||||
<link href='http://example.com/4' type='text/html' rel='alternate'/>
|
||||
<title>Mars</title>
|
||||
<content>the Red Planet</content>
|
||||
<updated>2006-01-04T00:00:00Z</updated>
|
||||
<source>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>two</planet:name>
|
||||
</source>
|
||||
</entry>
|
11
tests/data/splice/config.ini
Normal file
11
tests/data/splice/config.ini
Normal file
@ -0,0 +1,11 @@
|
||||
[Planet]
|
||||
name = test planet
|
||||
cache_directory = tests/data/splice/cache
|
||||
template_files =
|
||||
|
||||
[tests/data/spider/testfeed1b.atom]
|
||||
name = one
|
||||
|
||||
[tests/data/spider/testfeed2.atom]
|
||||
name = two
|
||||
|
52
tests/test_config.py
Normal file
52
tests/test_config.py
Normal file
@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest
|
||||
from planet import config
|
||||
|
||||
workdir = 'tests/work/spider/cache'
|
||||
|
||||
class ConfigTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
config.load('tests/data/config/basic.ini')
|
||||
|
||||
# administrivia
|
||||
|
||||
def test_template(self):
|
||||
self.assertEqual(['index.html.tmpl', 'atom.xml.tmpl'],
|
||||
config.template_files())
|
||||
|
||||
def test_feeds(self):
|
||||
self.assertEqual(['feed1', 'feed2'], config.feeds())
|
||||
|
||||
# planet wide configuration
|
||||
|
||||
def test_name(self):
|
||||
self.assertEqual('Test Configuration', config.name())
|
||||
|
||||
def test_link(self):
|
||||
self.assertEqual('Unconfigured Planet', config.link())
|
||||
|
||||
# per template configuration
|
||||
|
||||
def test_days_per_page(self):
|
||||
self.assertEqual(7, config.days_per_page('index.html.tmpl'))
|
||||
self.assertEqual(0, config.days_per_page('atom.xml.tmpl'))
|
||||
|
||||
def test_items_per_page(self):
|
||||
self.assertEqual(50, config.items_per_page('index.html.tmpl'))
|
||||
self.assertEqual(50, config.items_per_page('atom.xml.tmpl'))
|
||||
|
||||
def test_encoding(self):
|
||||
self.assertEqual('utf-8', config.encoding('index.html.tmpl'))
|
||||
self.assertEqual('utf-8', config.encoding('atom.xml.tmpl'))
|
||||
|
||||
# dictionaries
|
||||
|
||||
def test_feed_options(self):
|
||||
self.assertEqual('one', config.feed_options('feed1')['name'])
|
||||
self.assertEqual('two', config.feed_options('feed2')['name'])
|
||||
|
||||
def test_template_options(self):
|
||||
option = config.template_options('index.html.tmpl')
|
||||
self.assertEqual('7', option['days_per_page'])
|
||||
self.assertEqual('50', option['items_per_page'])
|
37
tests/test_reconstitute.py
Normal file
37
tests/test_reconstitute.py
Normal file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest, os, sys, glob, new, re, StringIO, time
|
||||
from planet import feedparser
|
||||
from planet.reconstitute import reconstitute
|
||||
|
||||
testfiles = 'tests/data/reconstitute/%s.xml'
|
||||
|
||||
class ReconstituteTest(unittest.TestCase):
|
||||
desc_re = re.compile("Description:\s*(.*?)\s*Expect:\s*(.*)\s*-->")
|
||||
|
||||
def eval(self, name):
|
||||
# read the test case
|
||||
try:
|
||||
testcase = open(testfiles % name)
|
||||
data = testcase.read()
|
||||
description, expect = self.desc_re.search(data).groups()
|
||||
testcase.close()
|
||||
except:
|
||||
raise RuntimeError, "can't parse %s" % name
|
||||
|
||||
# parse and reconstitute to a string
|
||||
work = StringIO.StringIO()
|
||||
results = feedparser.parse(data)
|
||||
reconstitute(results, results.entries[0]).writexml(work)
|
||||
|
||||
# verify the results
|
||||
results = feedparser.parse(work.getvalue().encode('utf-8'))
|
||||
self.assertFalse(results.bozo, 'xml is well formed')
|
||||
self.assertTrue(eval(expect, results.entries[0]), expect)
|
||||
|
||||
# build a test method for each test file
|
||||
for testcase in glob.glob(testfiles % '*'):
|
||||
root = os.path.splitext(os.path.basename(testcase))[0]
|
||||
func = lambda self, name=root: self.eval(name)
|
||||
method = new.instancemethod(func, None, ReconstituteTest)
|
||||
setattr(ReconstituteTest, "test_" + root, method)
|
65
tests/test_spider.py
Normal file
65
tests/test_spider.py
Normal file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest, os, glob, calendar
|
||||
from planet.spider import filename, spiderFeed, spiderPlanet
|
||||
from planet import feedparser, config
|
||||
|
||||
workdir = 'tests/work/spider/cache'
|
||||
testfeed = 'tests/data/spider/testfeed%s.atom'
|
||||
configfile = 'tests/data/spider/config.ini'
|
||||
|
||||
class SpiderTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
try:
|
||||
os.makedirs(workdir)
|
||||
except:
|
||||
self.tearDown()
|
||||
os.makedirs(workdir)
|
||||
|
||||
def tearDown(self):
|
||||
for file in glob.glob(workdir+"/*"):
|
||||
os.unlink(file)
|
||||
os.removedirs(workdir)
|
||||
|
||||
def test_filename(self):
|
||||
self.assertEqual('./example.com,index.html',
|
||||
filename('.', 'http://example.com/index.html'))
|
||||
self.assertEqual('./www.xn--8ws00zhy3a.com',
|
||||
filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))
|
||||
|
||||
def test_spiderFeed(self):
|
||||
config.load(configfile)
|
||||
spiderFeed(testfeed % '1b')
|
||||
files = glob.glob(workdir+"/*")
|
||||
|
||||
# verify that exactly four files were produced
|
||||
self.assertEqual(4, len(files))
|
||||
|
||||
# verify that the file names are as expected
|
||||
self.assertTrue(workdir +
|
||||
'/tag:planet.intertwingly.net,2006:testfeed1,1' in files)
|
||||
|
||||
# verify that the file timestamps match atom:updated
|
||||
for file in files:
|
||||
data = feedparser.parse(file)
|
||||
self.assertTrue(data.entries[0].source.planet_name)
|
||||
self.assertEqual(os.stat(file).st_mtime,
|
||||
calendar.timegm(data.entries[0].updated_parsed))
|
||||
|
||||
def test_spiderUpdate(self):
|
||||
spiderFeed(testfeed % '1a')
|
||||
self.test_spiderFeed()
|
||||
|
||||
def test_spiderPlanet(self):
|
||||
spiderPlanet(configfile)
|
||||
files = glob.glob(workdir+"/*")
|
||||
|
||||
# verify that exactly eight files were produced
|
||||
self.assertEqual(12, len(files))
|
||||
|
||||
# verify that the file names are as expected
|
||||
self.assertTrue(workdir +
|
||||
'/tag:planet.intertwingly.net,2006:testfeed1,1' in files)
|
||||
self.assertTrue(workdir +
|
||||
'/tag:planet.intertwingly.net,2006:testfeed2,1' in files)
|
||||
|
17
tests/test_splice.py
Normal file
17
tests/test_splice.py
Normal file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest
|
||||
from planet.splice import splice
|
||||
|
||||
configfile = 'tests/data/splice/config.ini'
|
||||
|
||||
class SpliceTest(unittest.TestCase):
|
||||
|
||||
def test_splice(self):
|
||||
doc = splice(configfile)
|
||||
self.assertEqual(8,len(doc.getElementsByTagName('entry')))
|
||||
self.assertEqual(2,len(doc.getElementsByTagName('planet:subscription')))
|
||||
self.assertEqual(10,len(doc.getElementsByTagName('planet:name')))
|
||||
|
||||
self.assertEqual('test planet',
|
||||
doc.getElementsByTagName('title')[0].firstChild.nodeValue)
|
Loading…
x
Reference in New Issue
Block a user