Initial filter support (no parameters)
This commit is contained in:
parent
4b23c2f967
commit
9e80c7e77f
17
filters/coral_cdn_filter.py
Normal file
17
filters/coral_cdn_filter.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
"""
|
||||||
|
Remap all images to take advantage of the Coral Content Distribution
|
||||||
|
Network <http://www.coralcdn.org/>.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys, urlparse, xml.dom.minidom
|
||||||
|
|
||||||
|
entry = xml.dom.minidom.parse(sys.stdin).documentElement
|
||||||
|
|
||||||
|
for node in entry.getElementsByTagName('img'):
|
||||||
|
if node.hasAttribute('src'):
|
||||||
|
component = list(urlparse.urlparse(node.getAttribute('src')))
|
||||||
|
if component[0]=='http' and component[1].find(':')<0:
|
||||||
|
component[1] += '.nyud.net:8080'
|
||||||
|
node.setAttribute('src', urlparse.urlunparse(component))
|
||||||
|
|
||||||
|
print entry.toxml('utf-8')
|
@ -101,6 +101,8 @@ def __init__():
|
|||||||
define_planet_list('template_files')
|
define_planet_list('template_files')
|
||||||
define_planet_list('bill_of_materials')
|
define_planet_list('bill_of_materials')
|
||||||
define_planet_list('template_directories')
|
define_planet_list('template_directories')
|
||||||
|
define_planet_list('filters')
|
||||||
|
define_planet_list('filter_directories')
|
||||||
define_planet_list('reading_lists')
|
define_planet_list('reading_lists')
|
||||||
|
|
||||||
# template options
|
# template options
|
||||||
@ -151,6 +153,12 @@ def load(config_file):
|
|||||||
else:
|
else:
|
||||||
log.error('Unable to find theme %s', theme)
|
log.error('Unable to find theme %s', theme)
|
||||||
|
|
||||||
|
# Filter support
|
||||||
|
dirs = config.filter_directories()
|
||||||
|
filter_dir = os.path.join(sys.path[0],'filters')
|
||||||
|
if filter_dir not in dirs and os.path.exists(filter_dir):
|
||||||
|
parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir]))
|
||||||
|
|
||||||
# Reading list support
|
# Reading list support
|
||||||
reading_lists = config.reading_lists()
|
reading_lists = config.reading_lists()
|
||||||
if reading_lists:
|
if reading_lists:
|
||||||
@ -209,8 +217,8 @@ def feedtype():
|
|||||||
|
|
||||||
def subscriptions():
|
def subscriptions():
|
||||||
""" list the feed subscriptions """
|
""" list the feed subscriptions """
|
||||||
return filter(lambda feed: feed!='Planet' and feed not in template_files(),
|
return filter(lambda feed: feed!='Planet' and
|
||||||
parser.sections())
|
feed not in template_files()+filters(), parser.sections())
|
||||||
|
|
||||||
def planet_options():
|
def planet_options():
|
||||||
""" dictionary of planet wide options"""
|
""" dictionary of planet wide options"""
|
||||||
|
@ -2,16 +2,21 @@ import planet
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
def run(template_file, doc):
|
def run(template_file, doc, mode='template'):
|
||||||
""" select a template module based on file extension and execute it """
|
""" select a template module based on file extension and execute it """
|
||||||
log = planet.getLogger(planet.config.log_level())
|
log = planet.getLogger(planet.config.log_level())
|
||||||
|
|
||||||
|
if mode == 'template':
|
||||||
|
dirs = planet.config.template_directories()
|
||||||
|
else:
|
||||||
|
dirs = planet.config.filter_directories()
|
||||||
|
|
||||||
# see if the template can be located
|
# see if the template can be located
|
||||||
for template_dir in planet.config.template_directories():
|
for template_dir in dirs:
|
||||||
template_resolved = os.path.join(template_dir, template_file)
|
template_resolved = os.path.join(template_dir, template_file)
|
||||||
if os.path.exists(template_resolved): break
|
if os.path.exists(template_resolved): break
|
||||||
else:
|
else:
|
||||||
return log.error("Unable to locate template %s", template_file)
|
return log.error("Unable to locate %s %s", mode, template_file)
|
||||||
|
|
||||||
# Add shell directory to the path, if not already there
|
# Add shell directory to the path, if not already there
|
||||||
shellpath = os.path.join(sys.path[0],'planet','shell')
|
shellpath = os.path.join(sys.path[0],'planet','shell')
|
||||||
@ -20,16 +25,22 @@ def run(template_file, doc):
|
|||||||
|
|
||||||
# Try loading module for processing this template, based on the extension
|
# Try loading module for processing this template, based on the extension
|
||||||
base,ext = os.path.splitext(os.path.basename(template_resolved))
|
base,ext = os.path.splitext(os.path.basename(template_resolved))
|
||||||
template_module_name = ext[1:]
|
module_name = ext[1:]
|
||||||
try:
|
try:
|
||||||
template_module = __import__(template_module_name)
|
module = __import__(module_name)
|
||||||
except Exception, inst:
|
except Exception, inst:
|
||||||
return log.error("Skipping template '%s' after failing to load '%s':" +
|
print module_name
|
||||||
" %s", template_resolved, template_module_name, inst)
|
return log.error("Skipping %s '%s' after failing to load '%s': %s",
|
||||||
|
mode, template_resolved, module_name, inst)
|
||||||
|
|
||||||
# Execute the shell module
|
# Execute the shell module
|
||||||
log.info("Processing template %s using %s", template_resolved,
|
if mode == 'filter':
|
||||||
template_module_name)
|
log.debug("Processing filer %s using %s", template_resolved,
|
||||||
output_dir = planet.config.output_dir()
|
module_name)
|
||||||
output_file = os.path.join(output_dir, base)
|
return module.run(template_resolved, doc, None)
|
||||||
template_module.run(template_resolved, doc, output_file)
|
else:
|
||||||
|
log.info("Processing template %s using %s", template_resolved,
|
||||||
|
module_name)
|
||||||
|
output_dir = planet.config.output_dir()
|
||||||
|
output_file = os.path.join(output_dir, base)
|
||||||
|
module.run(template_resolved, doc, output_file)
|
||||||
|
16
planet/shell/py.py
Normal file
16
planet/shell/py.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from subprocess import Popen, PIPE
|
||||||
|
|
||||||
|
def run(script, doc, output_file=None):
|
||||||
|
""" process an Python script """
|
||||||
|
|
||||||
|
if output_file:
|
||||||
|
out = open(output_file, 'w')
|
||||||
|
else:
|
||||||
|
out = PIPE
|
||||||
|
|
||||||
|
proc = Popen(['python', script], stdin=PIPE, stdout=out, stderr=PIPE)
|
||||||
|
stdout, stderr = proc.communicate(doc)
|
||||||
|
if stderr:
|
||||||
|
print stderr
|
||||||
|
|
||||||
|
return stdout
|
@ -7,7 +7,7 @@ and write each as a set of entries in a cache directory.
|
|||||||
import time, calendar, re, os
|
import time, calendar, re, os
|
||||||
from xml.dom import minidom
|
from xml.dom import minidom
|
||||||
# Planet modules
|
# Planet modules
|
||||||
import planet, config, feedparser, reconstitute
|
import planet, config, feedparser, reconstitute, shell
|
||||||
|
|
||||||
# Regular expressions to sanitise cache filenames
|
# Regular expressions to sanitise cache filenames
|
||||||
re_url_scheme = re.compile(r'^\w+:/*(\w+:|www\.)?')
|
re_url_scheme = re.compile(r'^\w+:/*(\w+:|www\.)?')
|
||||||
@ -39,9 +39,8 @@ def filename(directory, filename):
|
|||||||
def write(xdoc, out):
|
def write(xdoc, out):
|
||||||
""" write the document out to disk """
|
""" write the document out to disk """
|
||||||
file = open(out,'w')
|
file = open(out,'w')
|
||||||
file.write(xdoc.toxml('utf-8'))
|
file.write(xdoc)
|
||||||
file.close()
|
file.close()
|
||||||
xdoc.unlink()
|
|
||||||
|
|
||||||
def spiderFeed(feed):
|
def spiderFeed(feed):
|
||||||
""" Spider (fetch) a single feed """
|
""" Spider (fetch) a single feed """
|
||||||
@ -116,30 +115,43 @@ def spiderFeed(feed):
|
|||||||
xdoc=minidom.parseString('''<feed xmlns:planet="%s"
|
xdoc=minidom.parseString('''<feed xmlns:planet="%s"
|
||||||
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
|
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
|
||||||
reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
|
reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
|
||||||
write(xdoc, filename(sources, feed))
|
write(xdoc.toxml('utf-8'), filename(sources, feed))
|
||||||
|
xdoc.unlink()
|
||||||
|
|
||||||
# write each entry to the cache
|
# write each entry to the cache
|
||||||
cache = config.cache_directory()
|
cache = config.cache_directory()
|
||||||
for entry in data.entries:
|
for entry in data.entries:
|
||||||
|
|
||||||
|
# generate an id, if none is present
|
||||||
if not entry.has_key('id') or not entry.id:
|
if not entry.has_key('id') or not entry.id:
|
||||||
entry['id'] = reconstitute.id(None, entry)
|
entry['id'] = reconstitute.id(None, entry)
|
||||||
if not entry['id']: continue
|
if not entry['id']: continue
|
||||||
|
|
||||||
out = filename(cache, entry.id)
|
# compute cache file name based on the id
|
||||||
|
cache_file = filename(cache, entry.id)
|
||||||
|
|
||||||
|
# get updated-date either from the entry or the cache (default to now)
|
||||||
mtime = None
|
mtime = None
|
||||||
if entry.has_key('updated_parsed'):
|
if entry.has_key('updated_parsed'):
|
||||||
mtime = calendar.timegm(entry.updated_parsed)
|
mtime = calendar.timegm(entry.updated_parsed)
|
||||||
if mtime > time.time(): mtime = None
|
if mtime > time.time(): mtime = None
|
||||||
if not mtime:
|
if not mtime:
|
||||||
try:
|
try:
|
||||||
mtime = os.stat(out).st_mtime
|
mtime = os.stat(cache_file).st_mtime
|
||||||
except:
|
except:
|
||||||
mtime = time.time()
|
mtime = time.time()
|
||||||
entry['updated_parsed'] = time.gmtime(mtime)
|
entry['updated_parsed'] = time.gmtime(mtime)
|
||||||
|
|
||||||
write(reconstitute.reconstitute(data, entry), out)
|
# apply any filters
|
||||||
os.utime(out, (mtime, mtime))
|
xdoc = reconstitute.reconstitute(data, entry)
|
||||||
|
output = xdoc.toxml('utf-8')
|
||||||
|
xdoc.unlink()
|
||||||
|
for filter in config.filters():
|
||||||
|
output = shell.run(filter, output, mode="filter")
|
||||||
|
|
||||||
|
# write out and timestamp the results
|
||||||
|
write(output, cache_file)
|
||||||
|
os.utime(cache_file, (mtime, mtime))
|
||||||
|
|
||||||
def spiderPlanet(configFile):
|
def spiderPlanet(configFile):
|
||||||
""" Spider (fetch) an entire planet """
|
""" Spider (fetch) an entire planet """
|
||||||
|
7
tests/data/filter/coral_cdn.xml
Normal file
7
tests/data/filter/coral_cdn.xml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<entry xmlns="http://www.w3.org/2005/xhtml">
|
||||||
|
<content>
|
||||||
|
<div xmlns="http://www.w3.org/1999/xhtml">
|
||||||
|
<img src="http://example.com/foo.png"/>
|
||||||
|
</div>
|
||||||
|
</content>
|
||||||
|
</entry>
|
15
tests/test_filters.py
Normal file
15
tests/test_filters.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import unittest, xml.dom.minidom
|
||||||
|
from planet import shell
|
||||||
|
|
||||||
|
testfile = 'tests/data/filter/coral_cdn.xml'
|
||||||
|
filter = 'coral_cdn_filter.py'
|
||||||
|
|
||||||
|
class FilterTests(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_coral_cdn(self):
|
||||||
|
output = shell.run(filter, open(testfile).read(), mode="filter")
|
||||||
|
dom = xml.dom.minidom.parseString(output)
|
||||||
|
imgsrc = dom.getElementsByTagName('img')[0].getAttribute('src')
|
||||||
|
self.assertEqual('http://example.com.nyud.net:8080/foo.png', imgsrc)
|
Loading…
x
Reference in New Issue
Block a user