Updates from Sam Ruby.

This commit is contained in:
Jacques Distler 2006-09-24 15:43:19 -05:00
commit 2deb9bcf3d
19 changed files with 276 additions and 142 deletions

1
THANKS
View File

@ -3,6 +3,7 @@ Mary Gardiner - PythonPath
Elias Torres - FOAF OnlineAccounts Elias Torres - FOAF OnlineAccounts
Jacques Distler - Template patches Jacques Distler - Template patches
Michael Koziarski - HTTP Auth fix Michael Koziarski - HTTP Auth fix
Brian Ewins - Win32 / Portalocker
This codebase represents a radical refactoring of Planet 2.0, which lists This codebase represents a radical refactoring of Planet 2.0, which lists
the following contributors: the following contributors:

View File

@ -66,6 +66,9 @@ except:
if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1])) if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
name2codepoint[name]=ord(codepoint) name2codepoint[name]=ord(codepoint)
# python 2.2 support
if not hasattr(__builtins__, 'basestring'): basestring=str
# This RE makes Beautiful Soup able to parse XML with namespaces. # This RE makes Beautiful Soup able to parse XML with namespaces.
sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
@ -821,7 +824,8 @@ class SoupStrainer:
def _matches(self, markup, matchAgainst): def _matches(self, markup, matchAgainst):
#print "Matching %s against %s" % (markup, matchAgainst) #print "Matching %s against %s" % (markup, matchAgainst)
result = False result = False
if matchAgainst == True and type(matchAgainst) == types.BooleanType: if matchAgainst == True and (not hasattr(types, 'BooleanType') or
type(matchAgainst) == types.BooleanType):
result = markup != None result = markup != None
elif callable(matchAgainst): elif callable(matchAgainst):
result = matchAgainst(markup) result = matchAgainst(markup)
@ -869,7 +873,7 @@ def isString(s):
"""Convenience method that works with all 2.x versions of Python """Convenience method that works with all 2.x versions of Python
to determine whether or not something is stringlike.""" to determine whether or not something is stringlike."""
try: try:
return isinstance(s, unicode) or isintance(s, basestring) return isinstance(s, unicode) or isinstance(s, basestring)
except NameError: except NameError:
return isinstance(s, str) return isinstance(s, str)
@ -1284,6 +1288,12 @@ class BeautifulStoneSoup(Tag, SGMLParser):
j = i + len(toHandle) j = i + len(toHandle)
return j return j
def convert_charref(self, name):
return '&#%s;' % name
def convert_entityref(self, name):
return '&%s;' % name
class BeautifulSoup(BeautifulStoneSoup): class BeautifulSoup(BeautifulStoneSoup):
"""This parser knows the following facts about HTML: """This parser knows the following facts about HTML:
@ -1654,6 +1664,8 @@ class UnicodeDammit:
'''Given a string and its encoding, decodes the string into Unicode. '''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases''' %encoding is a string recognized by encodings.aliases'''
if not data: return u''
# strip Byte Order Mark (if present) # strip Byte Order Mark (if present)
if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
and (data[2:4] != '\x00\x00'): and (data[2:4] != '\x00\x00'):

View File

@ -67,8 +67,8 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
options = {} options = {}
# add original options # add original options
for key, value in orig_config.items(list): for key in orig_config.options(list):
options[key] = value options[key] = orig_config.get(list, key)
try: try:
if use_cache: if use_cache:
@ -85,7 +85,13 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
cached_config.set(list, key, value) cached_config.set(list, key, value)
# read list # read list
base = urljoin('file:', os.path.abspath(os.path.curdir)) curdir=getattr(os.path, 'curdir', '.')
if sys.platform.find('win') < 0:
base = urljoin('file:', os.path.abspath(curdir))
else:
path = os.path.abspath(os.path.curdir)
base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
request = urllib2.Request(urljoin(base + '/', list)) request = urllib2.Request(urljoin(base + '/', list))
if options.has_key("etag"): if options.has_key("etag"):
request.add_header('If-None-Match', options['etag']) request.add_header('If-None-Match', options['etag'])

View File

@ -149,11 +149,14 @@ def load(config_file):
config.template_directories()] config.template_directories()]
# merge configurations, allowing current one to override theme # merge configurations, allowing current one to override theme
template_files = config.template_files()
parser.read(config_file) parser.read(config_file)
for file in config.bill_of_materials(): for file in config.bill_of_materials():
if not file in bom: bom.append(file) if not file in bom: bom.append(file)
parser.set('Planet', 'bill_of_materials', ' '.join(bom)) parser.set('Planet', 'bill_of_materials', ' '.join(bom))
parser.set('Planet', 'template_directories', ' '.join(dirs)) parser.set('Planet', 'template_directories', ' '.join(dirs))
parser.set('Planet', 'template_files',
' '.join(template_files + config.template_files()))
break break
else: else:
log.error('Unable to find theme %s', theme) log.error('Unable to find theme %s', theme)

View File

@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/> Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
""" """
__version__ = "4.2-pre-" + "$Revision: 1.139 $"[11:16] + "-cvs" __version__ = "4.2-pre-" + "$Revision: 1.141 $"[11:16] + "-cvs"
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. __license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification, Redistribution and use in source and binary forms, with or without modification,
@ -130,6 +130,18 @@ try:
except: except:
chardet = None chardet = None
# reversable htmlentitydefs mappings for Python 2.2
try:
from htmlentitydefs import name2codepoint, codepoint2name
except:
import htmlentitydefs
name2codepoint={}
codepoint2name={}
for (name,codepoint) in htmlentitydefs.entitydefs.iteritems():
if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
name2codepoint[name]=ord(codepoint)
codepoint2name[ord(codepoint)]=name
# BeautifulSoup parser used for parsing microformats from embedded HTML content # BeautifulSoup parser used for parsing microformats from embedded HTML content
# http://www.crummy.com/software/BeautifulSoup/. At the moment, it appears # http://www.crummy.com/software/BeautifulSoup/. At the moment, it appears
# that there is a version incompatibility, so the import is replaced with # that there is a version incompatibility, so the import is replaced with
@ -574,20 +586,9 @@ class _FeedParserMixin:
if text.startswith('&#') and text.endswith(';'): if text.startswith('&#') and text.endswith(';'):
return self.handle_entityref(text) return self.handle_entityref(text)
else: else:
# entity resolution graciously donated by Aaron Swartz try: name2codepoint[ref]
def name2cp(k):
import htmlentitydefs
if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
return htmlentitydefs.name2codepoint[k]
k = htmlentitydefs.entitydefs[k]
if k.startswith('&#x') and k.endswith(';'):
return int(k[3:-1],16) # not in latin-1
if k.startswith('&#') and k.endswith(';'):
return int(k[2:-1]) # not in latin-1
return ord(k)
try: name2cp(ref)
except KeyError: text = '&%s;' % ref except KeyError: text = '&%s;' % ref
else: text = unichr(name2cp(ref)).encode('utf-8') else: text = unichr(name2codepoint[ref]).encode('utf-8')
self.elementstack[-1][2].append(text) self.elementstack[-1][2].append(text)
def handle_data(self, text, escape=1): def handle_data(self, text, escape=1):
@ -672,9 +673,9 @@ class _FeedParserMixin:
# only if all the remaining content is nested underneath it. # only if all the remaining content is nested underneath it.
# This means that the divs would be retained in the following: # This means that the divs would be retained in the following:
# <div>foo</div><div>bar</div> # <div>foo</div><div>bar</div>
if pieces and len(pieces)>1 and not pieces[-1].strip(): while pieces and len(pieces)>1 and not pieces[-1].strip():
del pieces[-1] del pieces[-1]
if pieces and len(pieces)>1 and not pieces[0].strip(): while pieces and len(pieces)>1 and not pieces[0].strip():
del pieces[0] del pieces[0]
if pieces and (pieces[0] == '<div>' or pieces[0].startswith('<div ')) and pieces[-1]=='</div>': if pieces and (pieces[0] == '<div>' or pieces[0].startswith('<div ')) and pieces[-1]=='</div>':
depth = 0 depth = 0
@ -1521,6 +1522,11 @@ if _XML_AVAILABLE:
if prefix: if prefix:
localname = prefix.lower() + ':' + localname localname = prefix.lower() + ':' + localname
elif namespace and not qname: #Expat
for name,value in self.namespacesInUse.items():
if name and value == namespace:
localname = name + ':' + localname
break
if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname)) if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
for (namespace, attrlocalname), attrvalue in attrs._attrs.items(): for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
@ -1546,6 +1552,11 @@ if _XML_AVAILABLE:
prefix = self._matchnamespaces.get(lowernamespace, givenprefix) prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
if prefix: if prefix:
localname = prefix + ':' + localname localname = prefix + ':' + localname
elif namespace and not qname: #Expat
for name,value in self.namespacesInUse.items():
if name and value == namespace:
localname = name + ':' + localname
break
localname = str(localname).lower() localname = str(localname).lower()
self.unknown_endtag(localname) self.unknown_endtag(localname)
@ -1657,8 +1668,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
def handle_entityref(self, ref): def handle_entityref(self, ref):
# called for each entity reference, e.g. for '&copy;', ref will be 'copy' # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
# Reconstruct the original entity reference. # Reconstruct the original entity reference.
import htmlentitydefs if name2codepoint.has_key(ref):
if not hasattr(htmlentitydefs, 'name2codepoint') or htmlentitydefs.name2codepoint.has_key(ref):
self.pieces.append('&%(ref)s;' % locals()) self.pieces.append('&%(ref)s;' % locals())
else: else:
self.pieces.append('&amp;%(ref)s' % locals()) self.pieces.append('&amp;%(ref)s' % locals())
@ -1705,6 +1715,12 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
# self.updatepos(declstartpos, i) # self.updatepos(declstartpos, i)
return None, -1 return None, -1
def convert_charref(self, name):
return '&#%s;' % name
def convert_entityref(self, name):
return '&%s;' % name
def output(self): def output(self):
'''Return processed HTML as a single string''' '''Return processed HTML as a single string'''
return ''.join([str(p) for p in self.pieces]) return ''.join([str(p) for p in self.pieces])

View File

@ -44,6 +44,7 @@ import cgi # for HTML escaping of variables
import urllib # for URL escaping of variables import urllib # for URL escaping of variables
import cPickle # for template compilation import cPickle # for template compilation
import gettext import gettext
import portalocker # for locking
INCLUDE_DIR = "inc" INCLUDE_DIR = "inc"
@ -57,25 +58,6 @@ PARAM_ESCAPE = 2
PARAM_GLOBAL = 3 PARAM_GLOBAL = 3
PARAM_GETTEXT_STRING = 1 PARAM_GETTEXT_STRING = 1
# Find a way to lock files. Currently implemented only for UNIX and windows.
LOCKTYPE_FCNTL = 1
LOCKTYPE_MSVCRT = 2
LOCKTYPE = None
try:
import fcntl
except:
try:
import msvcrt
except:
LOCKTYPE = None
else:
LOCKTYPE = LOCKTYPE_MSVCRT
else:
LOCKTYPE = LOCKTYPE_FCNTL
LOCK_EX = 1
LOCK_SH = 2
LOCK_UN = 3
############################################## ##############################################
# CLASS: TemplateManager # # CLASS: TemplateManager #
############################################## ##############################################
@ -130,13 +112,6 @@ class TemplateManager:
The <em>TemplateError</em>exception is raised when the precompiled The <em>TemplateError</em>exception is raised when the precompiled
template cannot be saved. Precompilation is enabled by default. template cannot be saved. Precompilation is enabled by default.
Precompilation is available only on UNIX and Windows platforms,
because proper file locking which is necessary to ensure
multitask safe behaviour is platform specific and is not
implemented for other platforms. Attempts to enable precompilation
on the other platforms result in raise of the
<em>TemplateError</em> exception.
@param comments Enable or disable template comments. @param comments Enable or disable template comments.
This optional parameter can be used to enable or disable This optional parameter can be used to enable or disable
template comments. template comments.
@ -159,13 +134,6 @@ class TemplateManager:
self._gettext = gettext self._gettext = gettext
self._debug = debug self._debug = debug
# Find what module to use to lock files.
# File locking is necessary for the 'precompile' feature to be
# multitask/thread safe. Currently it works only on UNIX
# and Windows. Anyone willing to implement it on Mac ?
if precompile and not LOCKTYPE:
raise TemplateError, "Template precompilation is not "\
"available on this platform."
self.DEB("INIT DONE") self.DEB("INIT DONE")
def prepare(self, file): def prepare(self, file):
@ -260,33 +228,6 @@ class TemplateManager:
""" """
if self._debug: print >> sys.stderr, str if self._debug: print >> sys.stderr, str
def lock_file(self, file, lock):
""" Provide platform independent file locking.
@hidden
"""
fd = file.fileno()
if LOCKTYPE == LOCKTYPE_FCNTL:
if lock == LOCK_SH:
fcntl.flock(fd, fcntl.LOCK_SH)
elif lock == LOCK_EX:
fcntl.flock(fd, fcntl.LOCK_EX)
elif lock == LOCK_UN:
fcntl.flock(fd, fcntl.LOCK_UN)
else:
raise TemplateError, "BUG: bad lock in lock_file"
elif LOCKTYPE == LOCKTYPE_MSVCRT:
if lock == LOCK_SH:
# msvcrt does not support shared locks :-(
msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
elif lock == LOCK_EX:
msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
elif lock == LOCK_UN:
msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
else:
raise TemplateError, "BUG: bad lock in lock_file"
else:
raise TemplateError, "BUG: bad locktype in lock_file"
def compile(self, file): def compile(self, file):
""" Compile the template. """ Compile the template.
@hidden @hidden
@ -322,7 +263,7 @@ class TemplateManager:
file = None file = None
try: try:
file = open(filename, "rb") file = open(filename, "rb")
self.lock_file(file, LOCK_SH) portalocker.lock(file, portalocker.LOCK_SH)
precompiled = cPickle.load(file) precompiled = cPickle.load(file)
except IOError, (errno, errstr): except IOError, (errno, errstr):
raise TemplateError, "IO error in load precompiled "\ raise TemplateError, "IO error in load precompiled "\
@ -338,7 +279,7 @@ class TemplateManager:
return precompiled return precompiled
finally: finally:
if file: if file:
self.lock_file(file, LOCK_UN) portalocker.unlock(file)
file.close() file.close()
if remove_bad and os.path.isfile(filename): if remove_bad and os.path.isfile(filename):
# X: We may lose the original exception here, raising OSError. # X: We may lose the original exception here, raising OSError.
@ -369,7 +310,7 @@ class TemplateManager:
file = None file = None
try: try:
file = open(filename, "wb") # may truncate existing file file = open(filename, "wb") # may truncate existing file
self.lock_file(file, LOCK_EX) portalocker.lock(file, portalocker.LOCK_EX)
BINARY = 1 BINARY = 1
READABLE = 0 READABLE = 0
if self._debug: if self._debug:
@ -393,7 +334,7 @@ class TemplateManager:
self.DEB("SAVING PRECOMPILED") self.DEB("SAVING PRECOMPILED")
finally: finally:
if file: if file:
self.lock_file(file, LOCK_UN) portalocker.unlock(file)
file.close() file.close()
if remove_bad and os.path.isfile(filename): if remove_bad and os.path.isfile(filename):
# X: We may lose the original exception here, raising OSError. # X: We may lose the original exception here, raising OSError.

93
planet/portalocker.py Normal file
View File

@ -0,0 +1,93 @@
# portalocker.py - Cross-platform (posix/nt) API for flock-style file locking.
# Requires python 1.5.2 or better.
# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203/index_txt
# Except where otherwise noted, recipes in the Python Cookbook are
# published under the Python license.
"""Cross-platform (posix/nt) API for flock-style file locking.
Synopsis:
import portalocker
file = open("somefile", "r+")
portalocker.lock(file, portalocker.LOCK_EX)
file.seek(12)
file.write("foo")
file.close()
If you know what you're doing, you may choose to
portalocker.unlock(file)
before closing the file, but why?
Methods:
lock( file, flags )
unlock( file )
Constants:
LOCK_EX
LOCK_SH
LOCK_NB
I learned the win32 technique for locking files from sample code
provided by John Nielsen <nielsenjf@my-deja.com> in the documentation
that accompanies the win32 modules.
Author: Jonathan Feinberg <jdf@pobox.com>
Version: $Id: portalocker.py,v 1.3 2001/05/29 18:47:55 Administrator Exp $
"""
import os
if os.name == 'nt':
import win32con
import win32file
import pywintypes
LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
LOCK_SH = 0 # the default
LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
# is there any reason not to reuse the following structure?
__overlapped = pywintypes.OVERLAPPED()
elif os.name == 'posix':
import fcntl
LOCK_EX = fcntl.LOCK_EX
LOCK_SH = fcntl.LOCK_SH
LOCK_NB = fcntl.LOCK_NB
else:
raise RuntimeError("PortaLocker only defined for nt and posix platforms")
if os.name == 'nt':
def lock(file, flags):
hfile = win32file._get_osfhandle(file.fileno())
win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
def unlock(file):
hfile = win32file._get_osfhandle(file.fileno())
win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
elif os.name =='posix':
def lock(file, flags):
fcntl.flock(file.fileno(), flags)
def unlock(file):
fcntl.flock(file.fileno(), fcntl.LOCK_UN)
if __name__ == '__main__':
from time import time, strftime, localtime
import sys
import portalocker
log = open('log.txt', "a+")
portalocker.lock(log, portalocker.LOCK_EX)
timestamp = strftime("%m/%d/%Y %H:%M:%S\n", localtime(time()))
log.write( timestamp )
print "Wrote lines. Hit enter to release lock."
dummy = sys.stdin.readline()
log.close()

View File

@ -162,7 +162,7 @@ def content(xentry, name, detail, bozo):
xentry.appendChild(xcontent) xentry.appendChild(xcontent)
def source(xsource, source, bozo): def source(xsource, source, bozo, format):
""" copy source information to the entry """ """ copy source information to the entry """
xdoc = xsource.ownerDocument xdoc = xsource.ownerDocument
@ -193,6 +193,9 @@ def source(xsource, source, bozo):
if key.startswith('planet_'): if key.startswith('planet_'):
createTextElement(xsource, key.replace('_',':',1), value) createTextElement(xsource, key.replace('_',':',1), value)
createTextElement(xsource, 'planet:bozo', bozo and 'true' or 'false')
createTextElement(xsource, 'planet:format', format)
def reconstitute(feed, entry): def reconstitute(feed, entry):
""" create an entry document from a parsed feed """ """ create an entry document from a parsed feed """
xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n') xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
@ -222,7 +225,7 @@ def reconstitute(feed, entry):
author(xentry, 'contributor', contributor) author(xentry, 'contributor', contributor)
xsource = xdoc.createElement('source') xsource = xdoc.createElement('source')
source(xsource, entry.get('source', feed.feed), bozo) source(xsource, entry.get('source', feed.feed), bozo, feed.version)
xentry.appendChild(xsource) xentry.appendChild(xsource)
return xdoc return xdoc

View File

@ -2,6 +2,8 @@ import planet
import os import os
import sys import sys
logged_modes = []
def run(template_file, doc, mode='template'): def run(template_file, doc, mode='template'):
""" select a template module based on file extension and execute it """ """ select a template module based on file extension and execute it """
log = planet.getLogger(planet.config.log_level()) log = planet.getLogger(planet.config.log_level())
@ -16,7 +18,14 @@ def run(template_file, doc, mode='template'):
template_resolved = os.path.join(template_dir, template_file) template_resolved = os.path.join(template_dir, template_file)
if os.path.exists(template_resolved): break if os.path.exists(template_resolved): break
else: else:
return log.error("Unable to locate %s %s", mode, template_file) log.error("Unable to locate %s %s", mode, template_file)
if not mode in logged_modes:
log.info("%s search path:", mode)
for template_dir in dirs:
log.info(" %s", os.path.realpath(template_dir))
logged_modes.append(mode)
return
template_resolved = os.path.realpath(template_resolved)
# Add shell directory to the path, if not already there # Add shell directory to the path, if not already there
shellpath = os.path.join(sys.path[0],'planet','shell') shellpath = os.path.join(sys.path[0],'planet','shell')
@ -34,13 +43,11 @@ def run(template_file, doc, mode='template'):
# Execute the shell module # Execute the shell module
options = planet.config.template_options(template_file) options = planet.config.template_options(template_file)
log.debug("Processing %s %s using %s", mode,
os.path.realpath(template_resolved), module_name)
if mode == 'filter': if mode == 'filter':
log.debug("Processing filer %s using %s", template_resolved,
module_name)
return module.run(template_resolved, doc, None, options) return module.run(template_resolved, doc, None, options)
else: else:
log.info("Processing template %s using %s", template_resolved,
module_name)
output_dir = planet.config.output_dir() output_dir = planet.config.output_dir()
output_file = os.path.join(output_dir, base) output_file = os.path.join(output_dir, base)
module.run(template_resolved, doc, output_file, options) module.run(template_resolved, doc, output_file, options)

View File

@ -116,6 +116,9 @@ def spiderFeed(feed):
data = feedparser.parse(feed_info.feed.get('planet_http_location',feed), data = feedparser.parse(feed_info.feed.get('planet_http_location',feed),
etag=feed_info.feed.get('planet_http_etag',None), modified=modified) etag=feed_info.feed.get('planet_http_etag',None), modified=modified)
# if read failed, retain cached information
if not data.version and feed_info.version: data.feed = feed_info.feed
# capture http status # capture http status
if not data.has_key("status"): if not data.has_key("status"):
if data.has_key("entries") and len(data.entries)>0: if data.has_key("entries") and len(data.entries)>0:
@ -167,32 +170,6 @@ def spiderFeed(feed):
for name, value in config.feed_options(feed).items(): for name, value in config.feed_options(feed).items():
data.feed['planet_'+name] = value data.feed['planet_'+name] = value
# identify inactive feeds
if config.activity_threshold(feed):
activity_horizon = \
time.gmtime(time.time()-86400*config.activity_threshold(feed))
updated = [entry.updated_parsed for entry in data.entries
if entry.has_key('updated_parsed')]
updated.sort()
if not updated or updated[-1] < activity_horizon:
msg = "no activity in %d days" % config.activity_threshold(feed)
log.info(msg)
data.feed['planet_message'] = msg
# report channel level errors
if data.status == 403:
data.feed['planet_message'] = "403: forbidden"
elif data.status == 404:
data.feed['planet_message'] = "404: not found"
elif data.status == 408:
data.feed['planet_message'] = "408: request timeout"
elif data.status == 410:
data.feed['planet_message'] = "410: gone"
elif data.status == 500:
data.feed['planet_message'] = "internal server error"
elif data.status >= 400:
data.feed['planet_message'] = "http status %s" % data.status
# perform user configured scrub operations on the data # perform user configured scrub operations on the data
scrub(feed, data) scrub(feed, data)
@ -234,11 +211,37 @@ def spiderFeed(feed):
write(output, cache_file) write(output, cache_file)
os.utime(cache_file, (mtime, mtime)) os.utime(cache_file, (mtime, mtime))
# identify inactive feeds
if config.activity_threshold(feed):
activity_horizon = \
time.gmtime(time.time()-86400*config.activity_threshold(feed))
updated = [entry.updated_parsed for entry in data.entries
if entry.has_key('updated_parsed')]
updated.sort()
if not updated or updated[-1] < activity_horizon:
msg = "no activity in %d days" % config.activity_threshold(feed)
log.info(msg)
data.feed['planet_message'] = msg
# report channel level errors
if data.status == 403:
data.feed['planet_message'] = "403: forbidden"
elif data.status == 404:
data.feed['planet_message'] = "404: not found"
elif data.status == 408:
data.feed['planet_message'] = "408: request timeout"
elif data.status == 410:
data.feed['planet_message'] = "410: gone"
elif data.status == 500:
data.feed['planet_message'] = "internal server error"
elif data.status >= 400:
data.feed['planet_message'] = "http status %s" % data.status
# write the feed info to the cache # write the feed info to the cache
if not os.path.exists(sources): os.makedirs(sources) if not os.path.exists(sources): os.makedirs(sources)
xdoc=minidom.parseString('''<feed xmlns:planet="%s" xdoc=minidom.parseString('''<feed xmlns:planet="%s"
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns) xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
reconstitute.source(xdoc.documentElement, data.feed, data.bozo) reconstitute.source(xdoc.documentElement,data.feed,data.bozo,data.version)
write(xdoc.toxml('utf-8'), filename(sources, feed)) write(xdoc.toxml('utf-8'), filename(sources, feed))
xdoc.unlink() xdoc.unlink()

View File

@ -65,7 +65,7 @@ def splice():
if not data.feed: continue if not data.feed: continue
xdoc=minidom.parseString('''<planet:source xmlns:planet="%s" xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns) xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
reconstitute.source(xdoc.documentElement, data.feed, data.bozo) reconstitute.source(xdoc.documentElement, data.feed, None, None)
feed.appendChild(xdoc.documentElement) feed.appendChild(xdoc.documentElement)
return doc return doc

View File

@ -21,6 +21,10 @@ sys.path[0] = os.getcwd()
# find all of the planet test modules # find all of the planet test modules
modules = map(fullmodname, glob.glob(os.path.join('tests', 'test_*.py'))) modules = map(fullmodname, glob.glob(os.path.join('tests', 'test_*.py')))
# enable warnings
import planet
planet.getLogger("WARNING")
# load all of the tests into a suite # load all of the tests into a suite
suite = unittest.TestLoader().loadTestsFromNames(modules) suite = unittest.TestLoader().loadTestsFromNames(modules)

View File

@ -0,0 +1,8 @@
<!--
Description: id
Expect: source.planet_bozo == 'false'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry/>
</feed>

View File

@ -0,0 +1,8 @@
<!--
Description: id
Expect: source.planet_format == 'atom10'
-->
<feed xmlns="http://www.w3.org/2005/Atom">
<entry/>
</feed>

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import unittest, os, shutil import unittest, os, shutil
from planet import config, splice from planet import config, splice, logger
from xml.dom import minidom from xml.dom import minidom
workdir = 'tests/work/apply' workdir = 'tests/work/apply'
@ -32,7 +32,7 @@ class ApplyTest(unittest.TestCase):
for file in ['index.html', 'default.css', 'images/foaf.png']: for file in ['index.html', 'default.css', 'images/foaf.png']:
path = os.path.join(workdir, file) path = os.path.join(workdir, file)
self.assertTrue(os.path.exists(path)) self.assertTrue(os.path.exists(path))
self.assertTrue(os.stat(path).st_size > 0) self.assertTrue(os.stat(path).st_size > 0, file + ' has size 0')
# verify that index.html is well formed, has content, and xml:lang # verify that index.html is well formed, has content, and xml:lang
html = open(os.path.join(workdir, 'index.html')) html = open(os.path.join(workdir, 'index.html'))
@ -62,3 +62,26 @@ class ApplyTest(unittest.TestCase):
self.assertTrue(html.find('<h1>test planet</h1>')>=0) self.assertTrue(html.find('<h1>test planet</h1>')>=0)
self.assertTrue(html.find( self.assertTrue(html.find(
'<h4><a href="http://example.com/2">Venus</a></h4>')>=0) '<h4><a href="http://example.com/2">Venus</a></h4>')>=0)
try:
import libxml2
except ImportError:
try:
import win32pipe
(stdin,stdout) = win32pipe.popen4('xsltproc -V', 't')
stdin.close()
stdout.read()
try:
exitcode = stdout.close()
except IOError:
exitcode = -1
except:
import commands
(exitstatus,output) = commands.getstatusoutput('xsltproc -V')
exitcode = ((exitstatus>>8) & 0xFF)
if exitcode:
logger.warn("xsltproc is not available => can't test XSLT templates")
for method in dir(ApplyTest):
if method.startswith('test_'): delattr(ApplyTest,method)

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import unittest, xml.dom.minidom import unittest, xml.dom.minidom
from planet import shell, config from planet import shell, config, logger
class FilterTests(unittest.TestCase): class FilterTests(unittest.TestCase):
@ -80,7 +80,10 @@ try:
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE) sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE)
sed.communicate() sed.communicate()
if sed.returncode != 0: raise Exception if sed.returncode != 0:
except: logger.warn("sed is not available => can't test stripAd_yahoo")
# sed is not available del FilterTests.test_stripAd_yahoo
del FilterTests.test_stripAd_yahoo except ImportError:
logger.warn("Popen is not available => can't test filters")
for method in dir(FilterTests):
if method.startswith('test_'): delattr(FilterTests,method)

View File

@ -3,7 +3,7 @@
import unittest, os, shutil import unittest, os, shutil
from planet.foaf import foaf2config from planet.foaf import foaf2config
from ConfigParser import ConfigParser from ConfigParser import ConfigParser
from planet import config from planet import config, logger
workdir = 'tests/work/config/cache' workdir = 'tests/work/config/cache'
@ -119,6 +119,7 @@ class FoafTest(unittest.TestCase):
try: try:
import RDF import RDF
except: except:
logger.warn("Redland RDF is not available => can't test FOAF reading lists")
for key in FoafTest.__dict__.keys(): for key in FoafTest.__dict__.keys():
if key.startswith('test_'): delattr(FoafTest, key) if key.startswith('test_'): delattr(FoafTest, key)

View File

@ -6,7 +6,7 @@ from os.path import split
from glob import glob from glob import glob
from ConfigParser import ConfigParser from ConfigParser import ConfigParser
workdir = 'tests/work/config/cache' workdir = os.path.join('tests', 'work', 'config', 'cache')
class ReadingListTest(unittest.TestCase): class ReadingListTest(unittest.TestCase):
def setUp(self): def setUp(self):
@ -38,7 +38,7 @@ class ReadingListTest(unittest.TestCase):
def test_cache(self): def test_cache(self):
cache = glob(os.path.join(workdir,'lists','*')) cache = glob(os.path.join(workdir,'lists','*'))
self.assertTrue(1,len(cache)) self.assertEqual(1,len(cache))
parser = ConfigParser() parser = ConfigParser()
parser.read(cache[0]) parser.read(cache[0])

View File

@ -26,11 +26,13 @@ class SpiderTest(unittest.TestCase):
os.removedirs(os.path.split(workdir)[0]) os.removedirs(os.path.split(workdir)[0])
def test_filename(self): def test_filename(self):
self.assertEqual('./example.com,index.html', self.assertEqual(os.path.join('.', 'example.com,index.html'),
filename('.', 'http://example.com/index.html')) filename('.', 'http://example.com/index.html'))
self.assertEqual('./planet.intertwingly.net,2006,testfeed1,1', self.assertEqual(os.path.join('.',
'planet.intertwingly.net,2006,testfeed1,1'),
filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1')) filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1'))
self.assertEqual('./00000000-0000-0000-0000-000000000000', self.assertEqual(os.path.join('.',
'00000000-0000-0000-0000-000000000000'),
filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000')) filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000'))
# Requires Python 2.3 # Requires Python 2.3
@ -38,7 +40,7 @@ class SpiderTest(unittest.TestCase):
import encodings.idna import encodings.idna
except: except:
return return
self.assertEqual('./xn--8ws00zhy3a.com', self.assertEqual(os.path.join('.', 'xn--8ws00zhy3a.com'),
filename('.', u'http://www.\u8a79\u59c6\u65af.com/')) filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))
def test_spiderFeed(self): def test_spiderFeed(self):
@ -51,8 +53,8 @@ class SpiderTest(unittest.TestCase):
self.assertEqual(5, len(files)) self.assertEqual(5, len(files))
# verify that the file names are as expected # verify that the file names are as expected
self.assertTrue(workdir + self.assertTrue(os.path.join(workdir,
'/planet.intertwingly.net,2006,testfeed1,1' in files) 'planet.intertwingly.net,2006,testfeed1,1') in files)
# verify that the file timestamps match atom:updated # verify that the file timestamps match atom:updated
data = feedparser.parse(files[2]) data = feedparser.parse(files[2])
@ -73,10 +75,10 @@ class SpiderTest(unittest.TestCase):
self.assertEqual(13, len(files)) self.assertEqual(13, len(files))
# verify that the file names are as expected # verify that the file names are as expected
self.assertTrue(workdir + self.assertTrue(os.path.join(workdir,
'/planet.intertwingly.net,2006,testfeed1,1' in files) 'planet.intertwingly.net,2006,testfeed1,1') in files)
self.assertTrue(workdir + self.assertTrue(os.path.join(workdir,
'/planet.intertwingly.net,2006,testfeed2,1' in files) 'planet.intertwingly.net,2006,testfeed2,1') in files)
data = feedparser.parse(workdir + data = feedparser.parse(workdir +
'/planet.intertwingly.net,2006,testfeed3,1') '/planet.intertwingly.net,2006,testfeed3,1')