Updates from Sam Ruby.
This commit is contained in:
commit
2deb9bcf3d
1
THANKS
1
THANKS
@ -3,6 +3,7 @@ Mary Gardiner - PythonPath
|
||||
Elias Torres - FOAF OnlineAccounts
|
||||
Jacques Distler - Template patches
|
||||
Michael Koziarski - HTTP Auth fix
|
||||
Brian Ewins - Win32 / Portalocker
|
||||
|
||||
This codebase represents a radical refactoring of Planet 2.0, which lists
|
||||
the following contributors:
|
||||
|
@ -66,6 +66,9 @@ except:
|
||||
if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
|
||||
name2codepoint[name]=ord(codepoint)
|
||||
|
||||
# python 2.2 support
|
||||
if not hasattr(__builtins__, 'basestring'): basestring=str
|
||||
|
||||
# This RE makes Beautiful Soup able to parse XML with namespaces.
|
||||
sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
|
||||
|
||||
@ -821,7 +824,8 @@ class SoupStrainer:
|
||||
def _matches(self, markup, matchAgainst):
|
||||
#print "Matching %s against %s" % (markup, matchAgainst)
|
||||
result = False
|
||||
if matchAgainst == True and type(matchAgainst) == types.BooleanType:
|
||||
if matchAgainst == True and (not hasattr(types, 'BooleanType') or
|
||||
type(matchAgainst) == types.BooleanType):
|
||||
result = markup != None
|
||||
elif callable(matchAgainst):
|
||||
result = matchAgainst(markup)
|
||||
@ -869,7 +873,7 @@ def isString(s):
|
||||
"""Convenience method that works with all 2.x versions of Python
|
||||
to determine whether or not something is stringlike."""
|
||||
try:
|
||||
return isinstance(s, unicode) or isintance(s, basestring)
|
||||
return isinstance(s, unicode) or isinstance(s, basestring)
|
||||
except NameError:
|
||||
return isinstance(s, str)
|
||||
|
||||
@ -1284,6 +1288,12 @@ class BeautifulStoneSoup(Tag, SGMLParser):
|
||||
j = i + len(toHandle)
|
||||
return j
|
||||
|
||||
def convert_charref(self, name):
|
||||
return '&#%s;' % name
|
||||
|
||||
def convert_entityref(self, name):
|
||||
return '&%s;' % name
|
||||
|
||||
class BeautifulSoup(BeautifulStoneSoup):
|
||||
|
||||
"""This parser knows the following facts about HTML:
|
||||
@ -1654,6 +1664,8 @@ class UnicodeDammit:
|
||||
'''Given a string and its encoding, decodes the string into Unicode.
|
||||
%encoding is a string recognized by encodings.aliases'''
|
||||
|
||||
if not data: return u''
|
||||
|
||||
# strip Byte Order Mark (if present)
|
||||
if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
|
@ -67,8 +67,8 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
|
||||
options = {}
|
||||
|
||||
# add original options
|
||||
for key, value in orig_config.items(list):
|
||||
options[key] = value
|
||||
for key in orig_config.options(list):
|
||||
options[key] = orig_config.get(list, key)
|
||||
|
||||
try:
|
||||
if use_cache:
|
||||
@ -85,7 +85,13 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
|
||||
cached_config.set(list, key, value)
|
||||
|
||||
# read list
|
||||
base = urljoin('file:', os.path.abspath(os.path.curdir))
|
||||
curdir=getattr(os.path, 'curdir', '.')
|
||||
if sys.platform.find('win') < 0:
|
||||
base = urljoin('file:', os.path.abspath(curdir))
|
||||
else:
|
||||
path = os.path.abspath(os.path.curdir)
|
||||
base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
|
||||
|
||||
request = urllib2.Request(urljoin(base + '/', list))
|
||||
if options.has_key("etag"):
|
||||
request.add_header('If-None-Match', options['etag'])
|
||||
|
@ -149,11 +149,14 @@ def load(config_file):
|
||||
config.template_directories()]
|
||||
|
||||
# merge configurations, allowing current one to override theme
|
||||
template_files = config.template_files()
|
||||
parser.read(config_file)
|
||||
for file in config.bill_of_materials():
|
||||
if not file in bom: bom.append(file)
|
||||
parser.set('Planet', 'bill_of_materials', ' '.join(bom))
|
||||
parser.set('Planet', 'template_directories', ' '.join(dirs))
|
||||
parser.set('Planet', 'template_files',
|
||||
' '.join(template_files + config.template_files()))
|
||||
break
|
||||
else:
|
||||
log.error('Unable to find theme %s', theme)
|
||||
|
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
|
||||
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
||||
"""
|
||||
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.139 $"[11:16] + "-cvs"
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.141 $"[11:16] + "-cvs"
|
||||
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
@ -130,6 +130,18 @@ try:
|
||||
except:
|
||||
chardet = None
|
||||
|
||||
# reversable htmlentitydefs mappings for Python 2.2
|
||||
try:
|
||||
from htmlentitydefs import name2codepoint, codepoint2name
|
||||
except:
|
||||
import htmlentitydefs
|
||||
name2codepoint={}
|
||||
codepoint2name={}
|
||||
for (name,codepoint) in htmlentitydefs.entitydefs.iteritems():
|
||||
if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
|
||||
name2codepoint[name]=ord(codepoint)
|
||||
codepoint2name[ord(codepoint)]=name
|
||||
|
||||
# BeautifulSoup parser used for parsing microformats from embedded HTML content
|
||||
# http://www.crummy.com/software/BeautifulSoup/. At the moment, it appears
|
||||
# that there is a version incompatibility, so the import is replaced with
|
||||
@ -574,20 +586,9 @@ class _FeedParserMixin:
|
||||
if text.startswith('&#') and text.endswith(';'):
|
||||
return self.handle_entityref(text)
|
||||
else:
|
||||
# entity resolution graciously donated by Aaron Swartz
|
||||
def name2cp(k):
|
||||
import htmlentitydefs
|
||||
if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
|
||||
return htmlentitydefs.name2codepoint[k]
|
||||
k = htmlentitydefs.entitydefs[k]
|
||||
if k.startswith('&#x') and k.endswith(';'):
|
||||
return int(k[3:-1],16) # not in latin-1
|
||||
if k.startswith('&#') and k.endswith(';'):
|
||||
return int(k[2:-1]) # not in latin-1
|
||||
return ord(k)
|
||||
try: name2cp(ref)
|
||||
try: name2codepoint[ref]
|
||||
except KeyError: text = '&%s;' % ref
|
||||
else: text = unichr(name2cp(ref)).encode('utf-8')
|
||||
else: text = unichr(name2codepoint[ref]).encode('utf-8')
|
||||
self.elementstack[-1][2].append(text)
|
||||
|
||||
def handle_data(self, text, escape=1):
|
||||
@ -672,9 +673,9 @@ class _FeedParserMixin:
|
||||
# only if all the remaining content is nested underneath it.
|
||||
# This means that the divs would be retained in the following:
|
||||
# <div>foo</div><div>bar</div>
|
||||
if pieces and len(pieces)>1 and not pieces[-1].strip():
|
||||
while pieces and len(pieces)>1 and not pieces[-1].strip():
|
||||
del pieces[-1]
|
||||
if pieces and len(pieces)>1 and not pieces[0].strip():
|
||||
while pieces and len(pieces)>1 and not pieces[0].strip():
|
||||
del pieces[0]
|
||||
if pieces and (pieces[0] == '<div>' or pieces[0].startswith('<div ')) and pieces[-1]=='</div>':
|
||||
depth = 0
|
||||
@ -1521,6 +1522,11 @@ if _XML_AVAILABLE:
|
||||
|
||||
if prefix:
|
||||
localname = prefix.lower() + ':' + localname
|
||||
elif namespace and not qname: #Expat
|
||||
for name,value in self.namespacesInUse.items():
|
||||
if name and value == namespace:
|
||||
localname = name + ':' + localname
|
||||
break
|
||||
if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
|
||||
|
||||
for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
|
||||
@ -1546,6 +1552,11 @@ if _XML_AVAILABLE:
|
||||
prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
|
||||
if prefix:
|
||||
localname = prefix + ':' + localname
|
||||
elif namespace and not qname: #Expat
|
||||
for name,value in self.namespacesInUse.items():
|
||||
if name and value == namespace:
|
||||
localname = name + ':' + localname
|
||||
break
|
||||
localname = str(localname).lower()
|
||||
self.unknown_endtag(localname)
|
||||
|
||||
@ -1657,8 +1668,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
|
||||
def handle_entityref(self, ref):
|
||||
# called for each entity reference, e.g. for '©', ref will be 'copy'
|
||||
# Reconstruct the original entity reference.
|
||||
import htmlentitydefs
|
||||
if not hasattr(htmlentitydefs, 'name2codepoint') or htmlentitydefs.name2codepoint.has_key(ref):
|
||||
if name2codepoint.has_key(ref):
|
||||
self.pieces.append('&%(ref)s;' % locals())
|
||||
else:
|
||||
self.pieces.append('&%(ref)s' % locals())
|
||||
@ -1705,6 +1715,12 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
|
||||
# self.updatepos(declstartpos, i)
|
||||
return None, -1
|
||||
|
||||
def convert_charref(self, name):
|
||||
return '&#%s;' % name
|
||||
|
||||
def convert_entityref(self, name):
|
||||
return '&%s;' % name
|
||||
|
||||
def output(self):
|
||||
'''Return processed HTML as a single string'''
|
||||
return ''.join([str(p) for p in self.pieces])
|
||||
|
@ -44,6 +44,7 @@ import cgi # for HTML escaping of variables
|
||||
import urllib # for URL escaping of variables
|
||||
import cPickle # for template compilation
|
||||
import gettext
|
||||
import portalocker # for locking
|
||||
|
||||
INCLUDE_DIR = "inc"
|
||||
|
||||
@ -57,25 +58,6 @@ PARAM_ESCAPE = 2
|
||||
PARAM_GLOBAL = 3
|
||||
PARAM_GETTEXT_STRING = 1
|
||||
|
||||
# Find a way to lock files. Currently implemented only for UNIX and windows.
|
||||
LOCKTYPE_FCNTL = 1
|
||||
LOCKTYPE_MSVCRT = 2
|
||||
LOCKTYPE = None
|
||||
try:
|
||||
import fcntl
|
||||
except:
|
||||
try:
|
||||
import msvcrt
|
||||
except:
|
||||
LOCKTYPE = None
|
||||
else:
|
||||
LOCKTYPE = LOCKTYPE_MSVCRT
|
||||
else:
|
||||
LOCKTYPE = LOCKTYPE_FCNTL
|
||||
LOCK_EX = 1
|
||||
LOCK_SH = 2
|
||||
LOCK_UN = 3
|
||||
|
||||
##############################################
|
||||
# CLASS: TemplateManager #
|
||||
##############################################
|
||||
@ -129,13 +111,6 @@ class TemplateManager:
|
||||
|
||||
The <em>TemplateError</em>exception is raised when the precompiled
|
||||
template cannot be saved. Precompilation is enabled by default.
|
||||
|
||||
Precompilation is available only on UNIX and Windows platforms,
|
||||
because proper file locking which is necessary to ensure
|
||||
multitask safe behaviour is platform specific and is not
|
||||
implemented for other platforms. Attempts to enable precompilation
|
||||
on the other platforms result in raise of the
|
||||
<em>TemplateError</em> exception.
|
||||
|
||||
@param comments Enable or disable template comments.
|
||||
This optional parameter can be used to enable or disable
|
||||
@ -159,13 +134,6 @@ class TemplateManager:
|
||||
self._gettext = gettext
|
||||
self._debug = debug
|
||||
|
||||
# Find what module to use to lock files.
|
||||
# File locking is necessary for the 'precompile' feature to be
|
||||
# multitask/thread safe. Currently it works only on UNIX
|
||||
# and Windows. Anyone willing to implement it on Mac ?
|
||||
if precompile and not LOCKTYPE:
|
||||
raise TemplateError, "Template precompilation is not "\
|
||||
"available on this platform."
|
||||
self.DEB("INIT DONE")
|
||||
|
||||
def prepare(self, file):
|
||||
@ -260,33 +228,6 @@ class TemplateManager:
|
||||
"""
|
||||
if self._debug: print >> sys.stderr, str
|
||||
|
||||
def lock_file(self, file, lock):
|
||||
""" Provide platform independent file locking.
|
||||
@hidden
|
||||
"""
|
||||
fd = file.fileno()
|
||||
if LOCKTYPE == LOCKTYPE_FCNTL:
|
||||
if lock == LOCK_SH:
|
||||
fcntl.flock(fd, fcntl.LOCK_SH)
|
||||
elif lock == LOCK_EX:
|
||||
fcntl.flock(fd, fcntl.LOCK_EX)
|
||||
elif lock == LOCK_UN:
|
||||
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||
else:
|
||||
raise TemplateError, "BUG: bad lock in lock_file"
|
||||
elif LOCKTYPE == LOCKTYPE_MSVCRT:
|
||||
if lock == LOCK_SH:
|
||||
# msvcrt does not support shared locks :-(
|
||||
msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
|
||||
elif lock == LOCK_EX:
|
||||
msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
|
||||
elif lock == LOCK_UN:
|
||||
msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
|
||||
else:
|
||||
raise TemplateError, "BUG: bad lock in lock_file"
|
||||
else:
|
||||
raise TemplateError, "BUG: bad locktype in lock_file"
|
||||
|
||||
def compile(self, file):
|
||||
""" Compile the template.
|
||||
@hidden
|
||||
@ -322,7 +263,7 @@ class TemplateManager:
|
||||
file = None
|
||||
try:
|
||||
file = open(filename, "rb")
|
||||
self.lock_file(file, LOCK_SH)
|
||||
portalocker.lock(file, portalocker.LOCK_SH)
|
||||
precompiled = cPickle.load(file)
|
||||
except IOError, (errno, errstr):
|
||||
raise TemplateError, "IO error in load precompiled "\
|
||||
@ -338,7 +279,7 @@ class TemplateManager:
|
||||
return precompiled
|
||||
finally:
|
||||
if file:
|
||||
self.lock_file(file, LOCK_UN)
|
||||
portalocker.unlock(file)
|
||||
file.close()
|
||||
if remove_bad and os.path.isfile(filename):
|
||||
# X: We may lose the original exception here, raising OSError.
|
||||
@ -369,7 +310,7 @@ class TemplateManager:
|
||||
file = None
|
||||
try:
|
||||
file = open(filename, "wb") # may truncate existing file
|
||||
self.lock_file(file, LOCK_EX)
|
||||
portalocker.lock(file, portalocker.LOCK_EX)
|
||||
BINARY = 1
|
||||
READABLE = 0
|
||||
if self._debug:
|
||||
@ -393,7 +334,7 @@ class TemplateManager:
|
||||
self.DEB("SAVING PRECOMPILED")
|
||||
finally:
|
||||
if file:
|
||||
self.lock_file(file, LOCK_UN)
|
||||
portalocker.unlock(file)
|
||||
file.close()
|
||||
if remove_bad and os.path.isfile(filename):
|
||||
# X: We may lose the original exception here, raising OSError.
|
||||
|
93
planet/portalocker.py
Normal file
93
planet/portalocker.py
Normal file
@ -0,0 +1,93 @@
|
||||
# portalocker.py - Cross-platform (posix/nt) API for flock-style file locking.
|
||||
# Requires python 1.5.2 or better.
|
||||
# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203/index_txt
|
||||
# Except where otherwise noted, recipes in the Python Cookbook are
|
||||
# published under the Python license.
|
||||
|
||||
"""Cross-platform (posix/nt) API for flock-style file locking.
|
||||
|
||||
Synopsis:
|
||||
|
||||
import portalocker
|
||||
file = open("somefile", "r+")
|
||||
portalocker.lock(file, portalocker.LOCK_EX)
|
||||
file.seek(12)
|
||||
file.write("foo")
|
||||
file.close()
|
||||
|
||||
If you know what you're doing, you may choose to
|
||||
|
||||
portalocker.unlock(file)
|
||||
|
||||
before closing the file, but why?
|
||||
|
||||
Methods:
|
||||
|
||||
lock( file, flags )
|
||||
unlock( file )
|
||||
|
||||
Constants:
|
||||
|
||||
LOCK_EX
|
||||
LOCK_SH
|
||||
LOCK_NB
|
||||
|
||||
I learned the win32 technique for locking files from sample code
|
||||
provided by John Nielsen <nielsenjf@my-deja.com> in the documentation
|
||||
that accompanies the win32 modules.
|
||||
|
||||
Author: Jonathan Feinberg <jdf@pobox.com>
|
||||
Version: $Id: portalocker.py,v 1.3 2001/05/29 18:47:55 Administrator Exp $
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
if os.name == 'nt':
|
||||
import win32con
|
||||
import win32file
|
||||
import pywintypes
|
||||
LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
|
||||
LOCK_SH = 0 # the default
|
||||
LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
|
||||
# is there any reason not to reuse the following structure?
|
||||
__overlapped = pywintypes.OVERLAPPED()
|
||||
elif os.name == 'posix':
|
||||
import fcntl
|
||||
LOCK_EX = fcntl.LOCK_EX
|
||||
LOCK_SH = fcntl.LOCK_SH
|
||||
LOCK_NB = fcntl.LOCK_NB
|
||||
else:
|
||||
raise RuntimeError("PortaLocker only defined for nt and posix platforms")
|
||||
|
||||
if os.name == 'nt':
|
||||
def lock(file, flags):
|
||||
hfile = win32file._get_osfhandle(file.fileno())
|
||||
win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
|
||||
|
||||
def unlock(file):
|
||||
hfile = win32file._get_osfhandle(file.fileno())
|
||||
win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
|
||||
|
||||
elif os.name =='posix':
|
||||
def lock(file, flags):
|
||||
fcntl.flock(file.fileno(), flags)
|
||||
|
||||
def unlock(file):
|
||||
fcntl.flock(file.fileno(), fcntl.LOCK_UN)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from time import time, strftime, localtime
|
||||
import sys
|
||||
import portalocker
|
||||
|
||||
log = open('log.txt', "a+")
|
||||
portalocker.lock(log, portalocker.LOCK_EX)
|
||||
|
||||
timestamp = strftime("%m/%d/%Y %H:%M:%S\n", localtime(time()))
|
||||
log.write( timestamp )
|
||||
|
||||
print "Wrote lines. Hit enter to release lock."
|
||||
dummy = sys.stdin.readline()
|
||||
|
||||
log.close()
|
||||
|
@ -162,7 +162,7 @@ def content(xentry, name, detail, bozo):
|
||||
|
||||
xentry.appendChild(xcontent)
|
||||
|
||||
def source(xsource, source, bozo):
|
||||
def source(xsource, source, bozo, format):
|
||||
""" copy source information to the entry """
|
||||
xdoc = xsource.ownerDocument
|
||||
|
||||
@ -193,6 +193,9 @@ def source(xsource, source, bozo):
|
||||
if key.startswith('planet_'):
|
||||
createTextElement(xsource, key.replace('_',':',1), value)
|
||||
|
||||
createTextElement(xsource, 'planet:bozo', bozo and 'true' or 'false')
|
||||
createTextElement(xsource, 'planet:format', format)
|
||||
|
||||
def reconstitute(feed, entry):
|
||||
""" create an entry document from a parsed feed """
|
||||
xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
|
||||
@ -222,7 +225,7 @@ def reconstitute(feed, entry):
|
||||
author(xentry, 'contributor', contributor)
|
||||
|
||||
xsource = xdoc.createElement('source')
|
||||
source(xsource, entry.get('source', feed.feed), bozo)
|
||||
source(xsource, entry.get('source', feed.feed), bozo, feed.version)
|
||||
xentry.appendChild(xsource)
|
||||
|
||||
return xdoc
|
||||
|
@ -2,6 +2,8 @@ import planet
|
||||
import os
|
||||
import sys
|
||||
|
||||
logged_modes = []
|
||||
|
||||
def run(template_file, doc, mode='template'):
|
||||
""" select a template module based on file extension and execute it """
|
||||
log = planet.getLogger(planet.config.log_level())
|
||||
@ -16,7 +18,14 @@ def run(template_file, doc, mode='template'):
|
||||
template_resolved = os.path.join(template_dir, template_file)
|
||||
if os.path.exists(template_resolved): break
|
||||
else:
|
||||
return log.error("Unable to locate %s %s", mode, template_file)
|
||||
log.error("Unable to locate %s %s", mode, template_file)
|
||||
if not mode in logged_modes:
|
||||
log.info("%s search path:", mode)
|
||||
for template_dir in dirs:
|
||||
log.info(" %s", os.path.realpath(template_dir))
|
||||
logged_modes.append(mode)
|
||||
return
|
||||
template_resolved = os.path.realpath(template_resolved)
|
||||
|
||||
# Add shell directory to the path, if not already there
|
||||
shellpath = os.path.join(sys.path[0],'planet','shell')
|
||||
@ -34,13 +43,11 @@ def run(template_file, doc, mode='template'):
|
||||
|
||||
# Execute the shell module
|
||||
options = planet.config.template_options(template_file)
|
||||
log.debug("Processing %s %s using %s", mode,
|
||||
os.path.realpath(template_resolved), module_name)
|
||||
if mode == 'filter':
|
||||
log.debug("Processing filer %s using %s", template_resolved,
|
||||
module_name)
|
||||
return module.run(template_resolved, doc, None, options)
|
||||
else:
|
||||
log.info("Processing template %s using %s", template_resolved,
|
||||
module_name)
|
||||
output_dir = planet.config.output_dir()
|
||||
output_file = os.path.join(output_dir, base)
|
||||
module.run(template_resolved, doc, output_file, options)
|
||||
|
@ -116,6 +116,9 @@ def spiderFeed(feed):
|
||||
data = feedparser.parse(feed_info.feed.get('planet_http_location',feed),
|
||||
etag=feed_info.feed.get('planet_http_etag',None), modified=modified)
|
||||
|
||||
# if read failed, retain cached information
|
||||
if not data.version and feed_info.version: data.feed = feed_info.feed
|
||||
|
||||
# capture http status
|
||||
if not data.has_key("status"):
|
||||
if data.has_key("entries") and len(data.entries)>0:
|
||||
@ -166,32 +169,6 @@ def spiderFeed(feed):
|
||||
{'rel':'self', 'type':'application/atom+xml', 'href':feed}))
|
||||
for name, value in config.feed_options(feed).items():
|
||||
data.feed['planet_'+name] = value
|
||||
|
||||
# identify inactive feeds
|
||||
if config.activity_threshold(feed):
|
||||
activity_horizon = \
|
||||
time.gmtime(time.time()-86400*config.activity_threshold(feed))
|
||||
updated = [entry.updated_parsed for entry in data.entries
|
||||
if entry.has_key('updated_parsed')]
|
||||
updated.sort()
|
||||
if not updated or updated[-1] < activity_horizon:
|
||||
msg = "no activity in %d days" % config.activity_threshold(feed)
|
||||
log.info(msg)
|
||||
data.feed['planet_message'] = msg
|
||||
|
||||
# report channel level errors
|
||||
if data.status == 403:
|
||||
data.feed['planet_message'] = "403: forbidden"
|
||||
elif data.status == 404:
|
||||
data.feed['planet_message'] = "404: not found"
|
||||
elif data.status == 408:
|
||||
data.feed['planet_message'] = "408: request timeout"
|
||||
elif data.status == 410:
|
||||
data.feed['planet_message'] = "410: gone"
|
||||
elif data.status == 500:
|
||||
data.feed['planet_message'] = "internal server error"
|
||||
elif data.status >= 400:
|
||||
data.feed['planet_message'] = "http status %s" % data.status
|
||||
|
||||
# perform user configured scrub operations on the data
|
||||
scrub(feed, data)
|
||||
@ -233,12 +210,38 @@ def spiderFeed(feed):
|
||||
# write out and timestamp the results
|
||||
write(output, cache_file)
|
||||
os.utime(cache_file, (mtime, mtime))
|
||||
|
||||
# identify inactive feeds
|
||||
if config.activity_threshold(feed):
|
||||
activity_horizon = \
|
||||
time.gmtime(time.time()-86400*config.activity_threshold(feed))
|
||||
updated = [entry.updated_parsed for entry in data.entries
|
||||
if entry.has_key('updated_parsed')]
|
||||
updated.sort()
|
||||
if not updated or updated[-1] < activity_horizon:
|
||||
msg = "no activity in %d days" % config.activity_threshold(feed)
|
||||
log.info(msg)
|
||||
data.feed['planet_message'] = msg
|
||||
|
||||
# report channel level errors
|
||||
if data.status == 403:
|
||||
data.feed['planet_message'] = "403: forbidden"
|
||||
elif data.status == 404:
|
||||
data.feed['planet_message'] = "404: not found"
|
||||
elif data.status == 408:
|
||||
data.feed['planet_message'] = "408: request timeout"
|
||||
elif data.status == 410:
|
||||
data.feed['planet_message'] = "410: gone"
|
||||
elif data.status == 500:
|
||||
data.feed['planet_message'] = "internal server error"
|
||||
elif data.status >= 400:
|
||||
data.feed['planet_message'] = "http status %s" % data.status
|
||||
|
||||
# write the feed info to the cache
|
||||
if not os.path.exists(sources): os.makedirs(sources)
|
||||
xdoc=minidom.parseString('''<feed xmlns:planet="%s"
|
||||
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
|
||||
reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
|
||||
reconstitute.source(xdoc.documentElement,data.feed,data.bozo,data.version)
|
||||
write(xdoc.toxml('utf-8'), filename(sources, feed))
|
||||
xdoc.unlink()
|
||||
|
||||
|
@ -65,7 +65,7 @@ def splice():
|
||||
if not data.feed: continue
|
||||
xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
|
||||
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
|
||||
reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
|
||||
reconstitute.source(xdoc.documentElement, data.feed, None, None)
|
||||
feed.appendChild(xdoc.documentElement)
|
||||
|
||||
return doc
|
||||
|
@ -21,6 +21,10 @@ sys.path[0] = os.getcwd()
|
||||
# find all of the planet test modules
|
||||
modules = map(fullmodname, glob.glob(os.path.join('tests', 'test_*.py')))
|
||||
|
||||
# enable warnings
|
||||
import planet
|
||||
planet.getLogger("WARNING")
|
||||
|
||||
# load all of the tests into a suite
|
||||
suite = unittest.TestLoader().loadTestsFromNames(modules)
|
||||
|
||||
|
8
tests/data/reconstitute/source_bozo.xml
Normal file
8
tests/data/reconstitute/source_bozo.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<!--
|
||||
Description: id
|
||||
Expect: source.planet_bozo == 'false'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry/>
|
||||
</feed>
|
8
tests/data/reconstitute/source_format.xml
Normal file
8
tests/data/reconstitute/source_format.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<!--
|
||||
Description: id
|
||||
Expect: source.planet_format == 'atom10'
|
||||
-->
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry/>
|
||||
</feed>
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest, os, shutil
|
||||
from planet import config, splice
|
||||
from planet import config, splice, logger
|
||||
from xml.dom import minidom
|
||||
|
||||
workdir = 'tests/work/apply'
|
||||
@ -32,7 +32,7 @@ class ApplyTest(unittest.TestCase):
|
||||
for file in ['index.html', 'default.css', 'images/foaf.png']:
|
||||
path = os.path.join(workdir, file)
|
||||
self.assertTrue(os.path.exists(path))
|
||||
self.assertTrue(os.stat(path).st_size > 0)
|
||||
self.assertTrue(os.stat(path).st_size > 0, file + ' has size 0')
|
||||
|
||||
# verify that index.html is well formed, has content, and xml:lang
|
||||
html = open(os.path.join(workdir, 'index.html'))
|
||||
@ -62,3 +62,26 @@ class ApplyTest(unittest.TestCase):
|
||||
self.assertTrue(html.find('<h1>test planet</h1>')>=0)
|
||||
self.assertTrue(html.find(
|
||||
'<h4><a href="http://example.com/2">Venus</a></h4>')>=0)
|
||||
|
||||
try:
|
||||
import libxml2
|
||||
except ImportError:
|
||||
|
||||
try:
|
||||
import win32pipe
|
||||
(stdin,stdout) = win32pipe.popen4('xsltproc -V', 't')
|
||||
stdin.close()
|
||||
stdout.read()
|
||||
try:
|
||||
exitcode = stdout.close()
|
||||
except IOError:
|
||||
exitcode = -1
|
||||
except:
|
||||
import commands
|
||||
(exitstatus,output) = commands.getstatusoutput('xsltproc -V')
|
||||
exitcode = ((exitstatus>>8) & 0xFF)
|
||||
|
||||
if exitcode:
|
||||
logger.warn("xsltproc is not available => can't test XSLT templates")
|
||||
for method in dir(ApplyTest):
|
||||
if method.startswith('test_'): delattr(ApplyTest,method)
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest, xml.dom.minidom
|
||||
from planet import shell, config
|
||||
from planet import shell, config, logger
|
||||
|
||||
class FilterTests(unittest.TestCase):
|
||||
|
||||
@ -80,7 +80,10 @@ try:
|
||||
from subprocess import Popen, PIPE
|
||||
sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE)
|
||||
sed.communicate()
|
||||
if sed.returncode != 0: raise Exception
|
||||
except:
|
||||
# sed is not available
|
||||
del FilterTests.test_stripAd_yahoo
|
||||
if sed.returncode != 0:
|
||||
logger.warn("sed is not available => can't test stripAd_yahoo")
|
||||
del FilterTests.test_stripAd_yahoo
|
||||
except ImportError:
|
||||
logger.warn("Popen is not available => can't test filters")
|
||||
for method in dir(FilterTests):
|
||||
if method.startswith('test_'): delattr(FilterTests,method)
|
||||
|
@ -3,7 +3,7 @@
|
||||
import unittest, os, shutil
|
||||
from planet.foaf import foaf2config
|
||||
from ConfigParser import ConfigParser
|
||||
from planet import config
|
||||
from planet import config, logger
|
||||
|
||||
workdir = 'tests/work/config/cache'
|
||||
|
||||
@ -119,6 +119,7 @@ class FoafTest(unittest.TestCase):
|
||||
try:
|
||||
import RDF
|
||||
except:
|
||||
logger.warn("Redland RDF is not available => can't test FOAF reading lists")
|
||||
for key in FoafTest.__dict__.keys():
|
||||
if key.startswith('test_'): delattr(FoafTest, key)
|
||||
|
||||
|
@ -6,7 +6,7 @@ from os.path import split
|
||||
from glob import glob
|
||||
from ConfigParser import ConfigParser
|
||||
|
||||
workdir = 'tests/work/config/cache'
|
||||
workdir = os.path.join('tests', 'work', 'config', 'cache')
|
||||
|
||||
class ReadingListTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
@ -38,7 +38,7 @@ class ReadingListTest(unittest.TestCase):
|
||||
|
||||
def test_cache(self):
|
||||
cache = glob(os.path.join(workdir,'lists','*'))
|
||||
self.assertTrue(1,len(cache))
|
||||
self.assertEqual(1,len(cache))
|
||||
|
||||
parser = ConfigParser()
|
||||
parser.read(cache[0])
|
||||
|
@ -26,11 +26,13 @@ class SpiderTest(unittest.TestCase):
|
||||
os.removedirs(os.path.split(workdir)[0])
|
||||
|
||||
def test_filename(self):
|
||||
self.assertEqual('./example.com,index.html',
|
||||
self.assertEqual(os.path.join('.', 'example.com,index.html'),
|
||||
filename('.', 'http://example.com/index.html'))
|
||||
self.assertEqual('./planet.intertwingly.net,2006,testfeed1,1',
|
||||
self.assertEqual(os.path.join('.',
|
||||
'planet.intertwingly.net,2006,testfeed1,1'),
|
||||
filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1'))
|
||||
self.assertEqual('./00000000-0000-0000-0000-000000000000',
|
||||
self.assertEqual(os.path.join('.',
|
||||
'00000000-0000-0000-0000-000000000000'),
|
||||
filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000'))
|
||||
|
||||
# Requires Python 2.3
|
||||
@ -38,7 +40,7 @@ class SpiderTest(unittest.TestCase):
|
||||
import encodings.idna
|
||||
except:
|
||||
return
|
||||
self.assertEqual('./xn--8ws00zhy3a.com',
|
||||
self.assertEqual(os.path.join('.', 'xn--8ws00zhy3a.com'),
|
||||
filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))
|
||||
|
||||
def test_spiderFeed(self):
|
||||
@ -51,8 +53,8 @@ class SpiderTest(unittest.TestCase):
|
||||
self.assertEqual(5, len(files))
|
||||
|
||||
# verify that the file names are as expected
|
||||
self.assertTrue(workdir +
|
||||
'/planet.intertwingly.net,2006,testfeed1,1' in files)
|
||||
self.assertTrue(os.path.join(workdir,
|
||||
'planet.intertwingly.net,2006,testfeed1,1') in files)
|
||||
|
||||
# verify that the file timestamps match atom:updated
|
||||
data = feedparser.parse(files[2])
|
||||
@ -73,10 +75,10 @@ class SpiderTest(unittest.TestCase):
|
||||
self.assertEqual(13, len(files))
|
||||
|
||||
# verify that the file names are as expected
|
||||
self.assertTrue(workdir +
|
||||
'/planet.intertwingly.net,2006,testfeed1,1' in files)
|
||||
self.assertTrue(workdir +
|
||||
'/planet.intertwingly.net,2006,testfeed2,1' in files)
|
||||
self.assertTrue(os.path.join(workdir,
|
||||
'planet.intertwingly.net,2006,testfeed1,1') in files)
|
||||
self.assertTrue(os.path.join(workdir,
|
||||
'planet.intertwingly.net,2006,testfeed2,1') in files)
|
||||
|
||||
data = feedparser.parse(workdir +
|
||||
'/planet.intertwingly.net,2006,testfeed3,1')
|
||||
|
Loading…
x
Reference in New Issue
Block a user