':
depth = 0
@@ -1521,6 +1522,11 @@ if _XML_AVAILABLE:
if prefix:
localname = prefix.lower() + ':' + localname
+ elif namespace and not qname: #Expat
+ for name,value in self.namespacesInUse.items():
+ if name and value == namespace:
+ localname = name + ':' + localname
+ break
if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
@@ -1546,6 +1552,11 @@ if _XML_AVAILABLE:
prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
if prefix:
localname = prefix + ':' + localname
+ elif namespace and not qname: #Expat
+ for name,value in self.namespacesInUse.items():
+ if name and value == namespace:
+ localname = name + ':' + localname
+ break
localname = str(localname).lower()
self.unknown_endtag(localname)
@@ -1657,8 +1668,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
def handle_entityref(self, ref):
# called for each entity reference, e.g. for '©', ref will be 'copy'
# Reconstruct the original entity reference.
- import htmlentitydefs
- if not hasattr(htmlentitydefs, 'name2codepoint') or htmlentitydefs.name2codepoint.has_key(ref):
+ if name2codepoint.has_key(ref):
self.pieces.append('&%(ref)s;' % locals())
else:
self.pieces.append('&%(ref)s' % locals())
@@ -1705,6 +1715,12 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
# self.updatepos(declstartpos, i)
return None, -1
+ def convert_charref(self, name):
+ return '%s;' % name
+
+ def convert_entityref(self, name):
+ return '&%s;' % name
+
def output(self):
'''Return processed HTML as a single string'''
return ''.join([str(p) for p in self.pieces])
diff --git a/planet/htmltmpl.py b/planet/htmltmpl.py
index be6e41b..d4fce5f 100644
--- a/planet/htmltmpl.py
+++ b/planet/htmltmpl.py
@@ -44,6 +44,7 @@ import cgi # for HTML escaping of variables
import urllib # for URL escaping of variables
import cPickle # for template compilation
import gettext
+import portalocker # for locking
INCLUDE_DIR = "inc"
@@ -57,25 +58,6 @@ PARAM_ESCAPE = 2
PARAM_GLOBAL = 3
PARAM_GETTEXT_STRING = 1
-# Find a way to lock files. Currently implemented only for UNIX and windows.
-LOCKTYPE_FCNTL = 1
-LOCKTYPE_MSVCRT = 2
-LOCKTYPE = None
-try:
- import fcntl
-except:
- try:
- import msvcrt
- except:
- LOCKTYPE = None
- else:
- LOCKTYPE = LOCKTYPE_MSVCRT
-else:
- LOCKTYPE = LOCKTYPE_FCNTL
-LOCK_EX = 1
-LOCK_SH = 2
-LOCK_UN = 3
-
##############################################
# CLASS: TemplateManager #
##############################################
@@ -129,13 +111,6 @@ class TemplateManager:
The
TemplateErrorexception is raised when the precompiled
template cannot be saved. Precompilation is enabled by default.
-
- Precompilation is available only on UNIX and Windows platforms,
- because proper file locking which is necessary to ensure
- multitask safe behaviour is platform specific and is not
- implemented for other platforms. Attempts to enable precompilation
- on the other platforms result in raise of the
-
TemplateError exception.
@param comments Enable or disable template comments.
This optional parameter can be used to enable or disable
@@ -159,13 +134,6 @@ class TemplateManager:
self._gettext = gettext
self._debug = debug
- # Find what module to use to lock files.
- # File locking is necessary for the 'precompile' feature to be
- # multitask/thread safe. Currently it works only on UNIX
- # and Windows. Anyone willing to implement it on Mac ?
- if precompile and not LOCKTYPE:
- raise TemplateError, "Template precompilation is not "\
- "available on this platform."
self.DEB("INIT DONE")
def prepare(self, file):
@@ -260,33 +228,6 @@ class TemplateManager:
"""
if self._debug: print >> sys.stderr, str
- def lock_file(self, file, lock):
- """ Provide platform independent file locking.
- @hidden
- """
- fd = file.fileno()
- if LOCKTYPE == LOCKTYPE_FCNTL:
- if lock == LOCK_SH:
- fcntl.flock(fd, fcntl.LOCK_SH)
- elif lock == LOCK_EX:
- fcntl.flock(fd, fcntl.LOCK_EX)
- elif lock == LOCK_UN:
- fcntl.flock(fd, fcntl.LOCK_UN)
- else:
- raise TemplateError, "BUG: bad lock in lock_file"
- elif LOCKTYPE == LOCKTYPE_MSVCRT:
- if lock == LOCK_SH:
- # msvcrt does not support shared locks :-(
- msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
- elif lock == LOCK_EX:
- msvcrt.locking(fd, msvcrt.LK_LOCK, 1)
- elif lock == LOCK_UN:
- msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
- else:
- raise TemplateError, "BUG: bad lock in lock_file"
- else:
- raise TemplateError, "BUG: bad locktype in lock_file"
-
def compile(self, file):
""" Compile the template.
@hidden
@@ -322,7 +263,7 @@ class TemplateManager:
file = None
try:
file = open(filename, "rb")
- self.lock_file(file, LOCK_SH)
+ portalocker.lock(file, portalocker.LOCK_SH)
precompiled = cPickle.load(file)
except IOError, (errno, errstr):
raise TemplateError, "IO error in load precompiled "\
@@ -338,7 +279,7 @@ class TemplateManager:
return precompiled
finally:
if file:
- self.lock_file(file, LOCK_UN)
+ portalocker.unlock(file)
file.close()
if remove_bad and os.path.isfile(filename):
# X: We may lose the original exception here, raising OSError.
@@ -369,7 +310,7 @@ class TemplateManager:
file = None
try:
file = open(filename, "wb") # may truncate existing file
- self.lock_file(file, LOCK_EX)
+ portalocker.lock(file, portalocker.LOCK_EX)
BINARY = 1
READABLE = 0
if self._debug:
@@ -393,7 +334,7 @@ class TemplateManager:
self.DEB("SAVING PRECOMPILED")
finally:
if file:
- self.lock_file(file, LOCK_UN)
+ portalocker.unlock(file)
file.close()
if remove_bad and os.path.isfile(filename):
# X: We may lose the original exception here, raising OSError.
diff --git a/planet/portalocker.py b/planet/portalocker.py
new file mode 100644
index 0000000..12592a3
--- /dev/null
+++ b/planet/portalocker.py
@@ -0,0 +1,93 @@
+# portalocker.py - Cross-platform (posix/nt) API for flock-style file locking.
+# Requires python 1.5.2 or better.
+# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203/index_txt
+# Except where otherwise noted, recipes in the Python Cookbook are
+# published under the Python license.
+
+"""Cross-platform (posix/nt) API for flock-style file locking.
+
+Synopsis:
+
+ import portalocker
+ file = open("somefile", "r+")
+ portalocker.lock(file, portalocker.LOCK_EX)
+ file.seek(12)
+ file.write("foo")
+ file.close()
+
+If you know what you're doing, you may choose to
+
+ portalocker.unlock(file)
+
+before closing the file, but why?
+
+Methods:
+
+ lock( file, flags )
+ unlock( file )
+
+Constants:
+
+ LOCK_EX
+ LOCK_SH
+ LOCK_NB
+
+I learned the win32 technique for locking files from sample code
+provided by John Nielsen
in the documentation
+that accompanies the win32 modules.
+
+Author: Jonathan Feinberg
+Version: $Id: portalocker.py,v 1.3 2001/05/29 18:47:55 Administrator Exp $
+"""
+
+import os
+
+if os.name == 'nt':
+ import win32con
+ import win32file
+ import pywintypes
+ LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
+ LOCK_SH = 0 # the default
+ LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
+ # is there any reason not to reuse the following structure?
+ __overlapped = pywintypes.OVERLAPPED()
+elif os.name == 'posix':
+ import fcntl
+ LOCK_EX = fcntl.LOCK_EX
+ LOCK_SH = fcntl.LOCK_SH
+ LOCK_NB = fcntl.LOCK_NB
+else:
+ raise RuntimeError("PortaLocker only defined for nt and posix platforms")
+
+if os.name == 'nt':
+ def lock(file, flags):
+ hfile = win32file._get_osfhandle(file.fileno())
+ win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
+
+ def unlock(file):
+ hfile = win32file._get_osfhandle(file.fileno())
+ win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
+
+elif os.name =='posix':
+ def lock(file, flags):
+ fcntl.flock(file.fileno(), flags)
+
+ def unlock(file):
+ fcntl.flock(file.fileno(), fcntl.LOCK_UN)
+
+if __name__ == '__main__':
+ from time import time, strftime, localtime
+ import sys
+ import portalocker
+
+ log = open('log.txt', "a+")
+ portalocker.lock(log, portalocker.LOCK_EX)
+
+ timestamp = strftime("%m/%d/%Y %H:%M:%S\n", localtime(time()))
+ log.write( timestamp )
+
+ print "Wrote lines. Hit enter to release lock."
+ dummy = sys.stdin.readline()
+
+ log.close()
+
diff --git a/planet/reconstitute.py b/planet/reconstitute.py
index 48a0c85..2badc50 100644
--- a/planet/reconstitute.py
+++ b/planet/reconstitute.py
@@ -162,7 +162,7 @@ def content(xentry, name, detail, bozo):
xentry.appendChild(xcontent)
-def source(xsource, source, bozo):
+def source(xsource, source, bozo, format):
""" copy source information to the entry """
xdoc = xsource.ownerDocument
@@ -193,6 +193,9 @@ def source(xsource, source, bozo):
if key.startswith('planet_'):
createTextElement(xsource, key.replace('_',':',1), value)
+ createTextElement(xsource, 'planet:bozo', bozo and 'true' or 'false')
+ createTextElement(xsource, 'planet:format', format)
+
def reconstitute(feed, entry):
""" create an entry document from a parsed feed """
xdoc=minidom.parseString('\n')
@@ -222,7 +225,7 @@ def reconstitute(feed, entry):
author(xentry, 'contributor', contributor)
xsource = xdoc.createElement('source')
- source(xsource, entry.get('source', feed.feed), bozo)
+ source(xsource, entry.get('source', feed.feed), bozo, feed.version)
xentry.appendChild(xsource)
return xdoc
diff --git a/planet/shell/__init__.py b/planet/shell/__init__.py
index 72cad5b..bd0e005 100644
--- a/planet/shell/__init__.py
+++ b/planet/shell/__init__.py
@@ -2,6 +2,8 @@ import planet
import os
import sys
+logged_modes = []
+
def run(template_file, doc, mode='template'):
""" select a template module based on file extension and execute it """
log = planet.getLogger(planet.config.log_level())
@@ -16,7 +18,14 @@ def run(template_file, doc, mode='template'):
template_resolved = os.path.join(template_dir, template_file)
if os.path.exists(template_resolved): break
else:
- return log.error("Unable to locate %s %s", mode, template_file)
+ log.error("Unable to locate %s %s", mode, template_file)
+ if not mode in logged_modes:
+ log.info("%s search path:", mode)
+ for template_dir in dirs:
+ log.info(" %s", os.path.realpath(template_dir))
+ logged_modes.append(mode)
+ return
+ template_resolved = os.path.realpath(template_resolved)
# Add shell directory to the path, if not already there
shellpath = os.path.join(sys.path[0],'planet','shell')
@@ -34,13 +43,11 @@ def run(template_file, doc, mode='template'):
# Execute the shell module
options = planet.config.template_options(template_file)
+ log.debug("Processing %s %s using %s", mode,
+ os.path.realpath(template_resolved), module_name)
if mode == 'filter':
- log.debug("Processing filer %s using %s", template_resolved,
- module_name)
return module.run(template_resolved, doc, None, options)
else:
- log.info("Processing template %s using %s", template_resolved,
- module_name)
output_dir = planet.config.output_dir()
output_file = os.path.join(output_dir, base)
module.run(template_resolved, doc, output_file, options)
diff --git a/planet/spider.py b/planet/spider.py
index 5dd7d2d..009d1d0 100644
--- a/planet/spider.py
+++ b/planet/spider.py
@@ -116,6 +116,9 @@ def spiderFeed(feed):
data = feedparser.parse(feed_info.feed.get('planet_http_location',feed),
etag=feed_info.feed.get('planet_http_etag',None), modified=modified)
+ # if read failed, retain cached information
+ if not data.version and feed_info.version: data.feed = feed_info.feed
+
# capture http status
if not data.has_key("status"):
if data.has_key("entries") and len(data.entries)>0:
@@ -166,32 +169,6 @@ def spiderFeed(feed):
{'rel':'self', 'type':'application/atom+xml', 'href':feed}))
for name, value in config.feed_options(feed).items():
data.feed['planet_'+name] = value
-
- # identify inactive feeds
- if config.activity_threshold(feed):
- activity_horizon = \
- time.gmtime(time.time()-86400*config.activity_threshold(feed))
- updated = [entry.updated_parsed for entry in data.entries
- if entry.has_key('updated_parsed')]
- updated.sort()
- if not updated or updated[-1] < activity_horizon:
- msg = "no activity in %d days" % config.activity_threshold(feed)
- log.info(msg)
- data.feed['planet_message'] = msg
-
- # report channel level errors
- if data.status == 403:
- data.feed['planet_message'] = "403: forbidden"
- elif data.status == 404:
- data.feed['planet_message'] = "404: not found"
- elif data.status == 408:
- data.feed['planet_message'] = "408: request timeout"
- elif data.status == 410:
- data.feed['planet_message'] = "410: gone"
- elif data.status == 500:
- data.feed['planet_message'] = "internal server error"
- elif data.status >= 400:
- data.feed['planet_message'] = "http status %s" % data.status
# perform user configured scrub operations on the data
scrub(feed, data)
@@ -233,12 +210,38 @@ def spiderFeed(feed):
# write out and timestamp the results
write(output, cache_file)
os.utime(cache_file, (mtime, mtime))
+
+ # identify inactive feeds
+ if config.activity_threshold(feed):
+ activity_horizon = \
+ time.gmtime(time.time()-86400*config.activity_threshold(feed))
+ updated = [entry.updated_parsed for entry in data.entries
+ if entry.has_key('updated_parsed')]
+ updated.sort()
+ if not updated or updated[-1] < activity_horizon:
+ msg = "no activity in %d days" % config.activity_threshold(feed)
+ log.info(msg)
+ data.feed['planet_message'] = msg
+
+ # report channel level errors
+ if data.status == 403:
+ data.feed['planet_message'] = "403: forbidden"
+ elif data.status == 404:
+ data.feed['planet_message'] = "404: not found"
+ elif data.status == 408:
+ data.feed['planet_message'] = "408: request timeout"
+ elif data.status == 410:
+ data.feed['planet_message'] = "410: gone"
+ elif data.status == 500:
+ data.feed['planet_message'] = "internal server error"
+ elif data.status >= 400:
+ data.feed['planet_message'] = "http status %s" % data.status
# write the feed info to the cache
if not os.path.exists(sources): os.makedirs(sources)
xdoc=minidom.parseString('''\n''' % planet.xmlns)
- reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
+ reconstitute.source(xdoc.documentElement,data.feed,data.bozo,data.version)
write(xdoc.toxml('utf-8'), filename(sources, feed))
xdoc.unlink()
diff --git a/planet/splice.py b/planet/splice.py
index bd20d2f..29046c5 100644
--- a/planet/splice.py
+++ b/planet/splice.py
@@ -65,7 +65,7 @@ def splice():
if not data.feed: continue
xdoc=minidom.parseString('''\n''' % planet.xmlns)
- reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
+ reconstitute.source(xdoc.documentElement, data.feed, None, None)
feed.appendChild(xdoc.documentElement)
return doc
diff --git a/runtests.py b/runtests.py
index 2ebb4cc..9fd5f70 100755
--- a/runtests.py
+++ b/runtests.py
@@ -21,6 +21,10 @@ sys.path[0] = os.getcwd()
# find all of the planet test modules
modules = map(fullmodname, glob.glob(os.path.join('tests', 'test_*.py')))
+# enable warnings
+import planet
+planet.getLogger("WARNING")
+
# load all of the tests into a suite
suite = unittest.TestLoader().loadTestsFromNames(modules)
diff --git a/tests/data/reconstitute/source_bozo.xml b/tests/data/reconstitute/source_bozo.xml
new file mode 100644
index 0000000..38a6317
--- /dev/null
+++ b/tests/data/reconstitute/source_bozo.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
diff --git a/tests/data/reconstitute/source_format.xml b/tests/data/reconstitute/source_format.xml
new file mode 100644
index 0000000..0e41171
--- /dev/null
+++ b/tests/data/reconstitute/source_format.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
diff --git a/tests/test_apply.py b/tests/test_apply.py
index e737ee0..dce69c1 100644
--- a/tests/test_apply.py
+++ b/tests/test_apply.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import unittest, os, shutil
-from planet import config, splice
+from planet import config, splice, logger
from xml.dom import minidom
workdir = 'tests/work/apply'
@@ -32,7 +32,7 @@ class ApplyTest(unittest.TestCase):
for file in ['index.html', 'default.css', 'images/foaf.png']:
path = os.path.join(workdir, file)
self.assertTrue(os.path.exists(path))
- self.assertTrue(os.stat(path).st_size > 0)
+ self.assertTrue(os.stat(path).st_size > 0, file + ' has size 0')
# verify that index.html is well formed, has content, and xml:lang
html = open(os.path.join(workdir, 'index.html'))
@@ -62,3 +62,26 @@ class ApplyTest(unittest.TestCase):
self.assertTrue(html.find('test planet
')>=0)
self.assertTrue(html.find(
'')>=0)
+
+try:
+ import libxml2
+except ImportError:
+
+ try:
+ import win32pipe
+ (stdin,stdout) = win32pipe.popen4('xsltproc -V', 't')
+ stdin.close()
+ stdout.read()
+ try:
+ exitcode = stdout.close()
+ except IOError:
+ exitcode = -1
+ except:
+ import commands
+ (exitstatus,output) = commands.getstatusoutput('xsltproc -V')
+ exitcode = ((exitstatus>>8) & 0xFF)
+
+ if exitcode:
+ logger.warn("xsltproc is not available => can't test XSLT templates")
+ for method in dir(ApplyTest):
+ if method.startswith('test_'): delattr(ApplyTest,method)
diff --git a/tests/test_filters.py b/tests/test_filters.py
index aeee9a4..f979946 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import unittest, xml.dom.minidom
-from planet import shell, config
+from planet import shell, config, logger
class FilterTests(unittest.TestCase):
@@ -80,7 +80,10 @@ try:
from subprocess import Popen, PIPE
sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE)
sed.communicate()
- if sed.returncode != 0: raise Exception
-except:
- # sed is not available
- del FilterTests.test_stripAd_yahoo
+ if sed.returncode != 0:
+ logger.warn("sed is not available => can't test stripAd_yahoo")
+ del FilterTests.test_stripAd_yahoo
+except ImportError:
+ logger.warn("Popen is not available => can't test filters")
+ for method in dir(FilterTests):
+ if method.startswith('test_'): delattr(FilterTests,method)
diff --git a/tests/test_foaf.py b/tests/test_foaf.py
index 29f6328..ace44c8 100644
--- a/tests/test_foaf.py
+++ b/tests/test_foaf.py
@@ -3,7 +3,7 @@
import unittest, os, shutil
from planet.foaf import foaf2config
from ConfigParser import ConfigParser
-from planet import config
+from planet import config, logger
workdir = 'tests/work/config/cache'
@@ -119,6 +119,7 @@ class FoafTest(unittest.TestCase):
try:
import RDF
except:
+ logger.warn("Redland RDF is not available => can't test FOAF reading lists")
for key in FoafTest.__dict__.keys():
if key.startswith('test_'): delattr(FoafTest, key)
diff --git a/tests/test_rlists.py b/tests/test_rlists.py
index 02285d5..5cb25ce 100644
--- a/tests/test_rlists.py
+++ b/tests/test_rlists.py
@@ -6,7 +6,7 @@ from os.path import split
from glob import glob
from ConfigParser import ConfigParser
-workdir = 'tests/work/config/cache'
+workdir = os.path.join('tests', 'work', 'config', 'cache')
class ReadingListTest(unittest.TestCase):
def setUp(self):
@@ -38,7 +38,7 @@ class ReadingListTest(unittest.TestCase):
def test_cache(self):
cache = glob(os.path.join(workdir,'lists','*'))
- self.assertTrue(1,len(cache))
+ self.assertEqual(1,len(cache))
parser = ConfigParser()
parser.read(cache[0])
diff --git a/tests/test_spider.py b/tests/test_spider.py
index 9f819d6..aa0a6c1 100644
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@@ -26,11 +26,13 @@ class SpiderTest(unittest.TestCase):
os.removedirs(os.path.split(workdir)[0])
def test_filename(self):
- self.assertEqual('./example.com,index.html',
+ self.assertEqual(os.path.join('.', 'example.com,index.html'),
filename('.', 'http://example.com/index.html'))
- self.assertEqual('./planet.intertwingly.net,2006,testfeed1,1',
+ self.assertEqual(os.path.join('.',
+ 'planet.intertwingly.net,2006,testfeed1,1'),
filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1'))
- self.assertEqual('./00000000-0000-0000-0000-000000000000',
+ self.assertEqual(os.path.join('.',
+ '00000000-0000-0000-0000-000000000000'),
filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000'))
# Requires Python 2.3
@@ -38,7 +40,7 @@ class SpiderTest(unittest.TestCase):
import encodings.idna
except:
return
- self.assertEqual('./xn--8ws00zhy3a.com',
+ self.assertEqual(os.path.join('.', 'xn--8ws00zhy3a.com'),
filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))
def test_spiderFeed(self):
@@ -51,8 +53,8 @@ class SpiderTest(unittest.TestCase):
self.assertEqual(5, len(files))
# verify that the file names are as expected
- self.assertTrue(workdir +
- '/planet.intertwingly.net,2006,testfeed1,1' in files)
+ self.assertTrue(os.path.join(workdir,
+ 'planet.intertwingly.net,2006,testfeed1,1') in files)
# verify that the file timestamps match atom:updated
data = feedparser.parse(files[2])
@@ -73,10 +75,10 @@ class SpiderTest(unittest.TestCase):
self.assertEqual(13, len(files))
# verify that the file names are as expected
- self.assertTrue(workdir +
- '/planet.intertwingly.net,2006,testfeed1,1' in files)
- self.assertTrue(workdir +
- '/planet.intertwingly.net,2006,testfeed2,1' in files)
+ self.assertTrue(os.path.join(workdir,
+ 'planet.intertwingly.net,2006,testfeed1,1') in files)
+ self.assertTrue(os.path.join(workdir,
+ 'planet.intertwingly.net,2006,testfeed2,1') in files)
data = feedparser.parse(workdir +
'/planet.intertwingly.net,2006,testfeed3,1')