diff --git a/filters/coral_cdn_filter.py b/filters/coral_cdn_filter.py
new file mode 100644
index 0000000..0192c63
--- /dev/null
+++ b/filters/coral_cdn_filter.py
@@ -0,0 +1,17 @@
+"""
+Remap all images to take advantage of the Coral Content Distribution
+Network .
+"""
+
+import sys, urlparse, xml.dom.minidom
+
+entry = xml.dom.minidom.parse(sys.stdin).documentElement
+
+for node in entry.getElementsByTagName('img'):
+ if node.hasAttribute('src'):
+ component = list(urlparse.urlparse(node.getAttribute('src')))
+ if component[0]=='http' and component[1].find(':')<0:
+ component[1] += '.nyud.net:8080'
+ node.setAttribute('src', urlparse.urlunparse(component))
+
+print entry.toxml('utf-8')
diff --git a/planet/config.py b/planet/config.py
index 9a56353..5b3b187 100644
--- a/planet/config.py
+++ b/planet/config.py
@@ -101,6 +101,8 @@ def __init__():
define_planet_list('template_files')
define_planet_list('bill_of_materials')
define_planet_list('template_directories')
+ define_planet_list('filters')
+ define_planet_list('filter_directories')
define_planet_list('reading_lists')
# template options
@@ -151,6 +153,12 @@ def load(config_file):
else:
log.error('Unable to find theme %s', theme)
+ # Filter support
+ dirs = config.filter_directories()
+ filter_dir = os.path.join(sys.path[0],'filters')
+ if filter_dir not in dirs and os.path.exists(filter_dir):
+ parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir]))
+
# Reading list support
reading_lists = config.reading_lists()
if reading_lists:
@@ -209,8 +217,8 @@ def feedtype():
def subscriptions():
""" list the feed subscriptions """
- return filter(lambda feed: feed!='Planet' and feed not in template_files(),
- parser.sections())
+ return filter(lambda feed: feed!='Planet' and
+ feed not in template_files()+filters(), parser.sections())
def planet_options():
""" dictionary of planet wide options"""
diff --git a/planet/shell/__init__.py b/planet/shell/__init__.py
index d1fe258..4d83661 100644
--- a/planet/shell/__init__.py
+++ b/planet/shell/__init__.py
@@ -2,16 +2,21 @@ import planet
import os
import sys
-def run(template_file, doc):
+def run(template_file, doc, mode='template'):
""" select a template module based on file extension and execute it """
log = planet.getLogger(planet.config.log_level())
+ if mode == 'template':
+ dirs = planet.config.template_directories()
+ else:
+ dirs = planet.config.filter_directories()
+
# see if the template can be located
- for template_dir in planet.config.template_directories():
+ for template_dir in dirs:
template_resolved = os.path.join(template_dir, template_file)
if os.path.exists(template_resolved): break
else:
- return log.error("Unable to locate template %s", template_file)
+ return log.error("Unable to locate %s %s", mode, template_file)
# Add shell directory to the path, if not already there
shellpath = os.path.join(sys.path[0],'planet','shell')
@@ -20,16 +25,22 @@ def run(template_file, doc):
# Try loading module for processing this template, based on the extension
base,ext = os.path.splitext(os.path.basename(template_resolved))
- template_module_name = ext[1:]
+ module_name = ext[1:]
try:
- template_module = __import__(template_module_name)
+ module = __import__(module_name)
except Exception, inst:
- return log.error("Skipping template '%s' after failing to load '%s':" +
- " %s", template_resolved, template_module_name, inst)
+ print module_name
+ return log.error("Skipping %s '%s' after failing to load '%s': %s",
+ mode, template_resolved, module_name, inst)
# Execute the shell module
- log.info("Processing template %s using %s", template_resolved,
- template_module_name)
- output_dir = planet.config.output_dir()
- output_file = os.path.join(output_dir, base)
- template_module.run(template_resolved, doc, output_file)
+ if mode == 'filter':
+ log.debug("Processing filer %s using %s", template_resolved,
+ module_name)
+ return module.run(template_resolved, doc, None)
+ else:
+ log.info("Processing template %s using %s", template_resolved,
+ module_name)
+ output_dir = planet.config.output_dir()
+ output_file = os.path.join(output_dir, base)
+ module.run(template_resolved, doc, output_file)
diff --git a/planet/shell/py.py b/planet/shell/py.py
new file mode 100644
index 0000000..cb233fb
--- /dev/null
+++ b/planet/shell/py.py
@@ -0,0 +1,16 @@
+from subprocess import Popen, PIPE
+
+def run(script, doc, output_file=None):
+ """ process an Python script """
+
+ if output_file:
+ out = open(output_file, 'w')
+ else:
+ out = PIPE
+
+ proc = Popen(['python', script], stdin=PIPE, stdout=out, stderr=PIPE)
+ stdout, stderr = proc.communicate(doc)
+ if stderr:
+ print stderr
+
+ return stdout
diff --git a/planet/spider.py b/planet/spider.py
index e39b241..ec05e27 100644
--- a/planet/spider.py
+++ b/planet/spider.py
@@ -7,7 +7,7 @@ and write each as a set of entries in a cache directory.
import time, calendar, re, os
from xml.dom import minidom
# Planet modules
-import planet, config, feedparser, reconstitute
+import planet, config, feedparser, reconstitute, shell
# Regular expressions to sanitise cache filenames
re_url_scheme = re.compile(r'^\w+:/*(\w+:|www\.)?')
@@ -39,9 +39,8 @@ def filename(directory, filename):
def write(xdoc, out):
""" write the document out to disk """
file = open(out,'w')
- file.write(xdoc.toxml('utf-8'))
+ file.write(xdoc)
file.close()
- xdoc.unlink()
def spiderFeed(feed):
""" Spider (fetch) a single feed """
@@ -116,30 +115,43 @@ def spiderFeed(feed):
xdoc=minidom.parseString('''\n''' % planet.xmlns)
reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
- write(xdoc, filename(sources, feed))
+ write(xdoc.toxml('utf-8'), filename(sources, feed))
+ xdoc.unlink()
# write each entry to the cache
cache = config.cache_directory()
for entry in data.entries:
+
+ # generate an id, if none is present
if not entry.has_key('id') or not entry.id:
entry['id'] = reconstitute.id(None, entry)
if not entry['id']: continue
- out = filename(cache, entry.id)
+ # compute cache file name based on the id
+ cache_file = filename(cache, entry.id)
+ # get updated-date either from the entry or the cache (default to now)
mtime = None
if entry.has_key('updated_parsed'):
mtime = calendar.timegm(entry.updated_parsed)
if mtime > time.time(): mtime = None
if not mtime:
try:
- mtime = os.stat(out).st_mtime
+ mtime = os.stat(cache_file).st_mtime
except:
mtime = time.time()
entry['updated_parsed'] = time.gmtime(mtime)
- write(reconstitute.reconstitute(data, entry), out)
- os.utime(out, (mtime, mtime))
+ # apply any filters
+ xdoc = reconstitute.reconstitute(data, entry)
+ output = xdoc.toxml('utf-8')
+ xdoc.unlink()
+ for filter in config.filters():
+ output = shell.run(filter, output, mode="filter")
+
+ # write out and timestamp the results
+ write(output, cache_file)
+ os.utime(cache_file, (mtime, mtime))
def spiderPlanet(configFile):
""" Spider (fetch) an entire planet """
diff --git a/tests/data/filter/coral_cdn.xml b/tests/data/filter/coral_cdn.xml
new file mode 100644
index 0000000..3c45248
--- /dev/null
+++ b/tests/data/filter/coral_cdn.xml
@@ -0,0 +1,7 @@
+
+
+
+

+
+
+
diff --git a/tests/test_filters.py b/tests/test_filters.py
new file mode 100644
index 0000000..5dc2938
--- /dev/null
+++ b/tests/test_filters.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+
+import unittest, xml.dom.minidom
+from planet import shell
+
+testfile = 'tests/data/filter/coral_cdn.xml'
+filter = 'coral_cdn_filter.py'
+
+class FilterTests(unittest.TestCase):
+
+ def test_coral_cdn(self):
+ output = shell.run(filter, open(testfile).read(), mode="filter")
+ dom = xml.dom.minidom.parseString(output)
+ imgsrc = dom.getElementsByTagName('img')[0].getAttribute('src')
+ self.assertEqual('http://example.com.nyud.net:8080/foo.png', imgsrc)