Change planet:subscription to planet:source, and expand the information
provided in each.
This commit is contained in:
parent
6c0e24fd00
commit
9fa9fb6117
@ -19,13 +19,15 @@
|
||||
|
||||
<h2>Subscriptions</h2>
|
||||
<ul>
|
||||
<xsl:for-each select="planet:subscription">
|
||||
<xsl:for-each select="planet:source">
|
||||
<xsl:sort select="planet:name"/>
|
||||
<li>
|
||||
<a href="{atom:link[@rel='self']/@href}" title="subscribe">
|
||||
<img src="images/feed-icon-10x10.png" alt="(feed)"/>
|
||||
</a>
|
||||
<xsl:value-of select="planet:name"/>
|
||||
<a href="{atom:link[@rel='alternate']/@href}">
|
||||
<xsl:value-of select="planet:name"/>
|
||||
</a>
|
||||
</li>
|
||||
</xsl:for-each>
|
||||
</ul>
|
||||
|
@ -1,3 +1,5 @@
|
||||
xmlns = 'http://planet.intertwingly.net/'
|
||||
|
||||
logger = None
|
||||
|
||||
def getLogger(level):
|
||||
|
@ -26,7 +26,7 @@ Todo:
|
||||
* error handling (example: no planet section)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os, sys
|
||||
from ConfigParser import ConfigParser
|
||||
|
||||
parser = ConfigParser()
|
||||
@ -83,6 +83,12 @@ def template_files():
|
||||
""" list the templates defined """
|
||||
return parser.get('Planet','template_files').split(' ')
|
||||
|
||||
def cache_sources_directory():
|
||||
if parser.has_option('Planet', 'cache_sources_directory'):
|
||||
parser.get('Planet', 'cache_sources_directory')
|
||||
else:
|
||||
return os.path.join(cache_directory(), 'sources')
|
||||
|
||||
def feeds():
|
||||
""" list the feeds defined """
|
||||
return filter(lambda feed: feed!='Planet' and feed not in template_files(),
|
||||
|
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
|
||||
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
||||
"""
|
||||
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.131 $"[11:16] + "-cvs"
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.132 $"[11:16] + "-cvs"
|
||||
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
@ -2379,12 +2379,16 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
|
||||
_BaseHTMLProcessor.handle_data(self, text)
|
||||
|
||||
def sanitize_style(self, style):
|
||||
# disallow urls
|
||||
style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style)
|
||||
|
||||
# gauntlet
|
||||
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return ''
|
||||
if not re.match("^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$", style): return ''
|
||||
|
||||
clean = []
|
||||
for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style):
|
||||
if not value: continue
|
||||
if prop.lower() in self.acceptable_css_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.split('-')[0].lower() in ['background','border','margin','padding']:
|
||||
|
@ -18,6 +18,7 @@ from xml.sax.saxutils import escape
|
||||
from xml.dom import minidom
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from xml.parsers.expat import ExpatError
|
||||
import planet
|
||||
|
||||
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
|
||||
|
||||
@ -141,10 +142,9 @@ def content(xentry, name, detail, bozo):
|
||||
|
||||
xentry.appendChild(xcontent)
|
||||
|
||||
def source(xentry, source, bozo):
|
||||
def source(xsource, source, bozo):
|
||||
""" copy source information to the entry """
|
||||
xdoc = xentry.ownerDocument
|
||||
xsource = xdoc.createElement('source')
|
||||
xdoc = xsource.ownerDocument
|
||||
|
||||
createTextElement(xsource, 'id', source.get('id', None))
|
||||
createTextElement(xsource, 'icon', source.get('icon', None))
|
||||
@ -164,16 +164,14 @@ def source(xentry, source, bozo):
|
||||
|
||||
# propagate planet inserted information
|
||||
for key, value in source.items():
|
||||
if key.startswith('planet:'):
|
||||
createTextElement(xsource, key, value)
|
||||
|
||||
xentry.appendChild(xsource)
|
||||
if key.startswith('planet_'):
|
||||
createTextElement(xsource, key.replace('_',':',1), value)
|
||||
|
||||
def reconstitute(feed, entry):
|
||||
""" create an entry document from a parsed feed """
|
||||
xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
|
||||
xentry=xdoc.documentElement
|
||||
xentry.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
|
||||
xentry.setAttribute('xmlns:planet',planet.xmlns)
|
||||
|
||||
id(xentry, entry)
|
||||
links(xentry, entry)
|
||||
@ -191,6 +189,8 @@ def reconstitute(feed, entry):
|
||||
for contributor in entry.get('contributors',[]):
|
||||
author(xentry, 'contributor', contributor)
|
||||
|
||||
source(xentry, entry.get('source', feed.feed), bozo)
|
||||
xsource = xdoc.createElement('source')
|
||||
source(xsource, entry.get('source', feed.feed), bozo)
|
||||
xentry.appendChild(xsource)
|
||||
|
||||
return xdoc
|
||||
|
@ -5,8 +5,9 @@ and write each as a set of entries in a cache directory.
|
||||
|
||||
# Standard library modules
|
||||
import time, calendar, re, os
|
||||
from xml.dom import minidom
|
||||
# Planet modules
|
||||
import config, feedparser, reconstitute
|
||||
import planet, config, feedparser, reconstitute
|
||||
|
||||
try:
|
||||
from xml.dom.ext import PrettyPrint
|
||||
@ -40,15 +41,45 @@ def filename(directory, filename):
|
||||
|
||||
return os.path.join(directory, filename)
|
||||
|
||||
def write(xdoc, out):
|
||||
""" write the document out to disk """
|
||||
file = open(out,'w')
|
||||
try:
|
||||
PrettyPrint(xdoc, file)
|
||||
except:
|
||||
# known reasons for failure include no pretty printer installed,
|
||||
# and absurdly high levels of markup nesting causing Python to
|
||||
# declare infinite recursion.
|
||||
file.seek(0)
|
||||
file.write(xdoc.toxml('utf-8'))
|
||||
file.close()
|
||||
xdoc.unlink()
|
||||
|
||||
def spiderFeed(feed):
|
||||
""" Spider (fetch) a single feed """
|
||||
data = feedparser.parse(feed)
|
||||
cache = config.cache_directory()
|
||||
if not data.feed: return
|
||||
|
||||
# capture data from the planet configuration file
|
||||
# capture feed and data from the planet configuration file
|
||||
if not data.feed.has_key('links'): data.feed['links'] = list()
|
||||
for link in data.feed.links:
|
||||
if link.rel == 'self': break
|
||||
else:
|
||||
data.feed.links.append(feedparser.FeedParserDict(
|
||||
{'rel':'self', 'type':'application/atom+xml', 'href':feed}))
|
||||
for name, value in config.feed_options(feed).items():
|
||||
data.feed['planet:'+name] = value
|
||||
data.feed['planet_'+name] = value
|
||||
|
||||
# write the feed info to the cache
|
||||
sources = config.cache_sources_directory()
|
||||
if not os.path.exists(sources): os.makedirs(sources)
|
||||
xdoc=minidom.parseString('''<feed xmlns:planet="%s"
|
||||
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
|
||||
reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
|
||||
write(xdoc, filename(sources, feed))
|
||||
|
||||
# write each entry to the cache
|
||||
cache = config.cache_directory()
|
||||
for entry in data.entries:
|
||||
if not entry.has_key('id'):
|
||||
entry['id'] = reconstitute.id(None, entry)
|
||||
@ -65,24 +96,11 @@ def spiderFeed(feed):
|
||||
mtime = time.time()
|
||||
entry['updated_parsed'] = time.gmtime(mtime)
|
||||
|
||||
xml = reconstitute.reconstitute(data, entry)
|
||||
|
||||
file = open(out,'w')
|
||||
try:
|
||||
PrettyPrint(reconstitute.reconstitute(data, entry), file)
|
||||
except:
|
||||
# known reasons for failure include no pretty printer installed,
|
||||
# and absurdly high levels of markup nesting causing Python to
|
||||
# declare infinite recursion.
|
||||
file.seek(0)
|
||||
file.write(reconstitute.reconstitute(data, entry).toxml('utf-8'))
|
||||
file.close()
|
||||
|
||||
write(reconstitute.reconstitute(data, entry), out)
|
||||
os.utime(out, (mtime, mtime))
|
||||
|
||||
def spiderPlanet(configFile):
|
||||
""" Spider (fetch) an entire planet """
|
||||
import planet
|
||||
config.load(configFile)
|
||||
log = planet.getLogger(config.log_level())
|
||||
planet.setTimeout(config.feed_timeout())
|
||||
|
@ -1,8 +1,9 @@
|
||||
""" Splice together a planet from a cache of feed entries """
|
||||
import glob, os
|
||||
from xml.dom import minidom
|
||||
import config
|
||||
import planet, config, feedparser, reconstitute
|
||||
from reconstitute import createTextElement
|
||||
from spider import filename
|
||||
|
||||
def splice(configFile):
|
||||
""" Splice together a planet from a cache of entries """
|
||||
@ -11,7 +12,8 @@ def splice(configFile):
|
||||
log = planet.getLogger(config.log_level())
|
||||
|
||||
cache = config.cache_directory()
|
||||
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")]
|
||||
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")
|
||||
if not os.path.isdir(file)]
|
||||
dir.sort()
|
||||
dir.reverse()
|
||||
|
||||
@ -34,17 +36,14 @@ def splice(configFile):
|
||||
feed.appendChild(entry.documentElement)
|
||||
|
||||
# insert subscription information
|
||||
feed.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
|
||||
feed.setAttribute('xmlns:planet',planet.xmlns)
|
||||
sources = config.cache_sources_directory()
|
||||
for sub in config.feeds():
|
||||
name = config.feed_options(sub).get('name','')
|
||||
xsub = doc.createElement('planet:subscription')
|
||||
xlink = doc.createElement('link')
|
||||
xlink.setAttribute('rel','self')
|
||||
xlink.setAttribute('href',sub.decode('utf-8'))
|
||||
xsub.appendChild(xlink)
|
||||
xname = doc.createElement('planet:name')
|
||||
xname.appendChild(doc.createTextNode(name.decode('utf-8')))
|
||||
xsub.appendChild(xname)
|
||||
feed.appendChild(xsub)
|
||||
data=feedparser.parse(filename(sources,sub))
|
||||
if not data.feed: continue
|
||||
xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
|
||||
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
|
||||
reconstitute.source(xdoc.documentElement, data.feed, data.bozo)
|
||||
feed.appendChild(xdoc.documentElement)
|
||||
|
||||
return doc
|
||||
|
1
spider.py
Normal file → Executable file
1
spider.py
Normal file → Executable file
@ -1,3 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Main program to run just the spider portion of planet
|
||||
"""
|
||||
|
1
splice.py
Normal file → Executable file
1
splice.py
Normal file → Executable file
@ -1,3 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Main program to run just the splice portion of planet
|
||||
"""
|
||||
|
@ -2,6 +2,9 @@
|
||||
cache_directory = tests/work/spider/cache
|
||||
template_files =
|
||||
|
||||
[tests/data/spider/testfeed0.atom]
|
||||
name = not found
|
||||
|
||||
[tests/data/spider/testfeed1b.atom]
|
||||
name = one
|
||||
|
||||
|
15
tests/data/splice/cache/example.com,3
vendored
Normal file
15
tests/data/splice/cache/example.com,3
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>http://example.com/3</id>
|
||||
<link href='http://example.com/3' type='text/html' rel='alternate'/>
|
||||
<title>Earth</title>
|
||||
<summary>the Blue Planet</summary>
|
||||
<updated>2006-01-03T00:00:00Z</updated>
|
||||
<source>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
|
||||
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<planet:name>three</planet:name>
|
||||
</source>
|
||||
</entry>
|
15
tests/data/splice/cache/example.com,4
vendored
Normal file
15
tests/data/splice/cache/example.com,4
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>http://example.com/4</id>
|
||||
<link href='http://example.com/4' type='text/html' rel='alternate'/>
|
||||
<title>Mars</title>
|
||||
<summary>the Red Planet</summary>
|
||||
<updated>2006-08-18T18:30:50Z</updated>
|
||||
<source>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
|
||||
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<planet:name>three</planet:name>
|
||||
</source>
|
||||
</entry>
|
15
tests/data/splice/cache/planet.intertwingly.net,2006,testfeed3,1
vendored
Normal file
15
tests/data/splice/cache/planet.intertwingly.net,2006,testfeed3,1
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed3/1</id>
|
||||
<link href='http://example.com/1' type='text/html' rel='alternate'/>
|
||||
<title>Mercury</title>
|
||||
<summary>Messenger of the Roman Gods</summary>
|
||||
<updated>2006-01-01T00:00:00Z</updated>
|
||||
<source>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
|
||||
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<planet:name>three</planet:name>
|
||||
</source>
|
||||
</entry>
|
15
tests/data/splice/cache/planet.intertwingly.net,2006,testfeed3,2
vendored
Normal file
15
tests/data/splice/cache/planet.intertwingly.net,2006,testfeed3,2
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed3/2</id>
|
||||
<link href='http://example.com/2' type='text/html' rel='alternate'/>
|
||||
<title>Venus</title>
|
||||
<summary>the Morning Star</summary>
|
||||
<updated>2006-08-18T18:30:50Z</updated>
|
||||
<source>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
|
||||
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<planet:name>three</planet:name>
|
||||
</source>
|
||||
</entry>
|
15
tests/data/splice/cache/sources/tests,data,spider,testfeed1b.atom
vendored
Normal file
15
tests/data/splice/cache/sources/tests,data,spider,testfeed1b.atom
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<feed xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>one</planet:name>
|
||||
</feed>
|
15
tests/data/splice/cache/sources/tests,data,spider,testfeed2.atom
vendored
Normal file
15
tests/data/splice/cache/sources/tests,data,spider,testfeed2.atom
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<feed xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
|
||||
<author>
|
||||
<name>Sam Ruby</name>
|
||||
<email>rubys@intertwingly.net</email>
|
||||
<uri>http://www.intertwingly.net/blog/</uri>
|
||||
</author>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
|
||||
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<updated>2006-06-17T00:15:18Z</updated>
|
||||
<planet:name>two</planet:name>
|
||||
</feed>
|
8
tests/data/splice/cache/sources/tests,data,spider,testfeed3.rss
vendored
Normal file
8
tests/data/splice/cache/sources/tests,data,spider,testfeed3.rss
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<feed xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
|
||||
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
|
||||
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
|
||||
<subtitle>It’s just data</subtitle>
|
||||
<title>Sam Ruby</title>
|
||||
<planet:name>three</planet:name>
|
||||
</feed>
|
@ -3,9 +3,14 @@ name = test planet
|
||||
cache_directory = tests/data/splice/cache
|
||||
template_files =
|
||||
|
||||
[tests/data/spider/testfeed0.atom]
|
||||
name = not found
|
||||
|
||||
[tests/data/spider/testfeed1b.atom]
|
||||
name = one
|
||||
|
||||
[tests/data/spider/testfeed2.atom]
|
||||
name = two
|
||||
|
||||
[tests/data/spider/testfeed3.rss]
|
||||
name = three
|
||||
|
@ -17,6 +17,10 @@ class SpiderTest(unittest.TestCase):
|
||||
os.makedirs(workdir)
|
||||
|
||||
def tearDown(self):
|
||||
for file in glob.glob(workdir+"/sources/*"):
|
||||
os.unlink(file)
|
||||
if os.path.exists(workdir+"/sources"):
|
||||
os.rmdir(workdir+"/sources")
|
||||
for file in glob.glob(workdir+"/*"):
|
||||
os.unlink(file)
|
||||
os.removedirs(workdir)
|
||||
@ -36,8 +40,8 @@ class SpiderTest(unittest.TestCase):
|
||||
spiderFeed(testfeed % '1b')
|
||||
files = glob.glob(workdir+"/*")
|
||||
|
||||
# verify that exactly four files were produced
|
||||
self.assertEqual(4, len(files))
|
||||
# verify that exactly four files + one sources dir were produced
|
||||
self.assertEqual(5, len(files))
|
||||
|
||||
# verify that the file names are as expected
|
||||
self.assertTrue(workdir +
|
||||
@ -45,6 +49,7 @@ class SpiderTest(unittest.TestCase):
|
||||
|
||||
# verify that the file timestamps match atom:updated
|
||||
for file in files:
|
||||
if file.endswith('/sources'): continue
|
||||
data = feedparser.parse(file)
|
||||
self.assertTrue(data.entries[0].source.planet_name)
|
||||
self.assertEqual(os.stat(file).st_mtime,
|
||||
@ -58,8 +63,8 @@ class SpiderTest(unittest.TestCase):
|
||||
spiderPlanet(configfile)
|
||||
files = glob.glob(workdir+"/*")
|
||||
|
||||
# verify that exactly eight files were produced
|
||||
self.assertEqual(12, len(files))
|
||||
# verify that exactly eight files + 1 source dir were produced
|
||||
self.assertEqual(13, len(files))
|
||||
|
||||
# verify that the file names are as expected
|
||||
self.assertTrue(workdir +
|
||||
|
@ -9,9 +9,9 @@ class SpliceTest(unittest.TestCase):
|
||||
|
||||
def test_splice(self):
|
||||
doc = splice(configfile)
|
||||
self.assertEqual(8,len(doc.getElementsByTagName('entry')))
|
||||
self.assertEqual(2,len(doc.getElementsByTagName('planet:subscription')))
|
||||
self.assertEqual(10,len(doc.getElementsByTagName('planet:name')))
|
||||
self.assertEqual(12,len(doc.getElementsByTagName('entry')))
|
||||
self.assertEqual(3,len(doc.getElementsByTagName('planet:source')))
|
||||
self.assertEqual(15,len(doc.getElementsByTagName('planet:name')))
|
||||
|
||||
self.assertEqual('test planet',
|
||||
doc.getElementsByTagName('title')[0].firstChild.nodeValue)
|
||||
|
Loading…
x
Reference in New Issue
Block a user