Reading lists

This commit is contained in:
Sam Ruby 2006-08-21 17:44:08 -04:00
parent 7cbc2e31bf
commit 87f99e8a01
24 changed files with 472 additions and 516 deletions

View File

@ -79,6 +79,7 @@ def __init__():
define_planet_list('template_files')
define_planet_list('bill_of_materials')
define_planet_list('template_directories')
define_planet_list('reading_lists')
# template options
define_tmpl_int('days_per_page', 0)
@ -87,45 +88,77 @@ def __init__():
def load(config_file):
""" initialize and load a configuration"""
import config, planet
log = planet.getLogger(config.log_level())
global parser
parser = ConfigParser()
parser.read(config_file)
if parser.has_option('Planet', 'output_theme'):
theme = parser.get('Planet', 'output_theme')
# Theme support
theme = config.output_theme()
if theme:
for path in ("", os.path.join(sys.path[0],'themes')):
theme_dir = os.path.join(path,theme)
theme_file = os.path.join(theme_dir,'config.ini')
if os.path.exists(theme_file):
# initial search list for theme directories
dirs = [theme_dir]
if parser.has_option('Planet', 'template_directories'):
dirs.insert(0,parser.get('Planet', 'template_directories'))
dirs = config.template_directories() + [theme_dir]
# read in the theme
parser = ConfigParser()
parser.read(theme_file)
# complete search list for theme directories
if parser.has_option('Planet', 'template_directories'):
dirs += [os.path.join(theme_dir,dir) for dir in
parser.get('Planet', 'template_directories').split()]
config.template_directories()]
# merge configurations, allowing current one to override theme
parser.read(config_file)
parser.set('Planet', 'template_directories', ' '.join(dirs))
break
else:
import config, planet
log = planet.getLogger(config.log_level())
log.error('Unable to find theme %s', theme)
# Reading list support
reading_lists = config.reading_lists()
if reading_lists:
if not os.path.exists(config.cache_lists_directory()):
os.makedirs(config.cache_lists_directory())
from planet.spider import filename
for list in reading_lists:
cache_filename = filename(config.cache_lists_directory(), list)
try:
import urllib
data=urllib.urlopen(list).read()
cache = open(cache_filename, 'w')
cache.write(data)
cache.close()
log.debug("Using %s readinglist", list)
except:
try:
cache = open(cache_filename)
data = cache.read()
cache.close()
log.info("Using cached %s readinglist", list)
except:
log.exception("Unable to read %s readinglist", list)
continue
planet.opml.opml2config(data, parser)
# planet.foaf.foaf2config(data, list, config)
def cache_sources_directory():
if parser.has_option('Planet', 'cache_sources_directory'):
parser.get('Planet', 'cache_sources_directory')
else:
return os.path.join(cache_directory(), 'sources')
def cache_lists_directory():
if parser.has_option('Planet', 'cache_lists_directory'):
parser.get('Planet', 'cache_lists_directory')
else:
return os.path.join(cache_directory(), 'lists')
def feeds():
""" list the feeds defined """
return filter(lambda feed: feed!='Planet' and feed not in template_files(),

142
planet/opml.py Executable file
View File

@ -0,0 +1,142 @@
from xml.sax import ContentHandler, make_parser, SAXParseException
from xml.sax.xmlreader import InputSource
from sgmllib import SGMLParser
from cStringIO import StringIO
from ConfigParser import ConfigParser
from htmlentitydefs import entitydefs
import re
# input = opml, output = ConfigParser
def opml2config(opml, config=None):
if hasattr(opml, 'read'):
opml = opml.read()
if not config:
config = ConfigParser()
opmlParser = OpmlParser(config)
try:
# try SAX
source = InputSource()
source.setByteStream(StringIO(opml))
parser = make_parser()
parser.setContentHandler(opmlParser)
parser.parse(source)
except SAXParseException:
# try as SGML
opmlParser.feed(opml)
return config
# Parse OPML via either SAX or SGML
class OpmlParser(ContentHandler,SGMLParser):
entities = re.compile('&(#?\w+);')
def __init__(self, config):
ContentHandler.__init__(self)
SGMLParser.__init__(self)
self.config = config
def startElement(self, name, attrs):
# we are only looking for data in 'outline' nodes.
if name != 'outline': return
# A type of 'rss' is meant to be used generically to indicate that
# this is an entry in a subscription list, but some leave this
# attribute off, and others have placed 'atom' in here
if attrs.has_key('type'):
if attrs['type'].lower() not in['rss','atom']: return
# The feed itself is supposed to be in an attribute named 'xmlUrl'
# (note the camel casing), but this has proven to be problematic,
# with the most common misspelling being in all lower-case
if not attrs.has_key('xmlUrl') or not attrs['xmlUrl'].strip():
for attribute in attrs.keys():
if attribute.lower() == 'xmlurl' and attrs[attribute].strip():
attrs = dict(attrs.items())
attrs['xmlUrl'] = attrs[attribute]
break
else:
return
# the text attribute is nominally required in OPML, but this
# data is often found in a title attribute instead
if not attrs.has_key('text') or not attrs['text'].strip():
if not attrs.has_key('title') or not attrs['title'].strip(): return
attrs = dict(attrs.items())
attrs['text'] = attrs['title']
# if we get this far, we either have a valid subscription list entry,
# or one with a correctable error. Add it to the configuration, if
# it is not already there.
xmlUrl = attrs['xmlUrl']
if not self.config.has_section(xmlUrl):
self.config.add_section(xmlUrl)
self.config.set(xmlUrl, 'name', self.unescape(attrs['text']))
def unescape(self, text):
parsed = self.entities.split(text)
for i in range(1,len(parsed),2):
if parsed[i] in entitydefs.keys():
# named entities
codepoint=entitydefs[parsed[i]]
match=self.entities.match(codepoint)
if match:
parsed[i]=match.group(1)
else:
parsed[i]=unichr(ord(codepoint))
# numeric entities
if parsed[i].startswith('#'):
if parsed[i].startswith('#x'):
parsed[i]=unichr(int(parsed[i][2:],16))
else:
parsed[i]=unichr(int(parsed[i][1:]))
return u''.join(parsed).encode('utf-8')
# SGML => SAX
def unknown_starttag(self, name, attrs):
attrs = dict(attrs)
for attribute in attrs:
try:
attrs[attribute] = attrs[attribute].decode('utf-8')
except:
work = attrs[attribute].decode('iso-8859-1')
work = u''.join([c in cp1252 and cp1252[c] or c for c in work])
attrs[attribute] = work
self.startElement(name, attrs)
# http://www.intertwingly.net/stories/2004/04/14/i18n.html#CleaningWindows
cp1252 = {
unichr(128): unichr(8364), # euro sign
unichr(130): unichr(8218), # single low-9 quotation mark
unichr(131): unichr( 402), # latin small letter f with hook
unichr(132): unichr(8222), # double low-9 quotation mark
unichr(133): unichr(8230), # horizontal ellipsis
unichr(134): unichr(8224), # dagger
unichr(135): unichr(8225), # double dagger
unichr(136): unichr( 710), # modifier letter circumflex accent
unichr(137): unichr(8240), # per mille sign
unichr(138): unichr( 352), # latin capital letter s with caron
unichr(139): unichr(8249), # single left-pointing angle quotation mark
unichr(140): unichr( 338), # latin capital ligature oe
unichr(142): unichr( 381), # latin capital letter z with caron
unichr(145): unichr(8216), # left single quotation mark
unichr(146): unichr(8217), # right single quotation mark
unichr(147): unichr(8220), # left double quotation mark
unichr(148): unichr(8221), # right double quotation mark
unichr(149): unichr(8226), # bullet
unichr(150): unichr(8211), # en dash
unichr(151): unichr(8212), # em dash
unichr(152): unichr( 732), # small tilde
unichr(153): unichr(8482), # trade mark sign
unichr(154): unichr( 353), # latin small letter s with caron
unichr(155): unichr(8250), # single right-pointing angle quotation mark
unichr(156): unichr( 339), # latin small ligature oe
unichr(158): unichr( 382), # latin small letter z with caron
unichr(159): unichr( 376)} # latin capital letter y with diaeresis

View File

@ -9,11 +9,6 @@ from xml.dom import minidom
# Planet modules
import planet, config, feedparser, reconstitute
try:
from xml.dom.ext import PrettyPrint
except:
PrettyPrint = None
# Regular expressions to sanitise cache filenames
re_url_scheme = re.compile(r'^\w+:/*(\w+:|www\.)?')
re_slash = re.compile(r'[?/:]+')
@ -44,13 +39,6 @@ def filename(directory, filename):
def write(xdoc, out):
""" write the document out to disk """
file = open(out,'w')
try:
PrettyPrint(xdoc, file)
except:
# known reasons for failure include no pretty printer installed,
# and absurdly high levels of markup nesting causing Python to
# declare infinite recursion.
file.seek(0)
file.write(xdoc.toxml('utf-8'))
file.close()
xdoc.unlink()

View File

@ -12,11 +12,11 @@ a major change in the contract between stages
import shutil, os, sys
# move up a directory
sys.path.insert(1, os.path.split(sys.path[0])[0])
os.chdir(sys.path[1])
sys.path.insert(0, os.path.split(sys.path[0])[0])
os.chdir(sys.path[0])
# copy spider output to splice input
from planet import spider
from planet import spider, config
spider.spiderPlanet('tests/data/spider/config.ini')
if os.path.exists('tests/data/splice/cache'):
shutil.rmtree('tests/data/splice/cache')
@ -38,5 +38,15 @@ source.close()
# copy splice output to apply input
from planet import splice
file=open('tests/data/apply/feed.xml', 'w')
file.write(splice.splice('tests/data/splice/config.ini').toxml('utf-8'))
data=splice.splice('tests/data/splice/config.ini').toxml('utf-8')
file.write(data)
file.close()
# copy apply output to config/reading-list input
config.load('tests/data/apply/config.ini')
splice.apply(data)
shutil.move('tests/work/apply/opml.xml', 'tests/data/config')
shutil.rmtree('tests/work')
import runtests

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,14 @@
<?xml version="1.0"?>
<opml xmlns="http://www.w3.org/1999/xhtml" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/" version="1.1">
<head>
<title>test planet</title>
<dateModified>August 21, 2006 09:40 PM</dateModified>
<ownerName>Anonymous Coward</ownerName>
<ownerEmail></ownerEmail>
</head>
<body>
<outline type="rss" text="two" title="Sam Ruby" xmlUrl="http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom"/>
<outline type="rss" text="three" title="Sam Ruby" xmlUrl="tests/data/spider/testfeed3.rss"/>
<outline type="rss" text="one" title="Sam Ruby" xmlUrl="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom"/>
</body>
</opml>

View File

@ -0,0 +1,5 @@
[Planet]
name = Test Configuration
output_theme = asf
cache_directory = tests/work/config/cache
reading_lists = tests/data/config/opml.xml

View File

@ -1,15 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>http://example.com/3</id>
<link href='http://example.com/3' type='text/html' rel='alternate'/>
<title>Earth</title>
<summary>the Blue Planet</summary>
<updated planet:format='January 03, 2006 12:00 AM'>2006-01-03T00:00:00Z</updated>
<source>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>http://example.com/3</id><link href="http://example.com/3" rel="alternate" type="text/html"/><title>Earth</title><summary>the Blue Planet</summary><updated planet:format="January 03, 2006 12:00 AM">2006-01-03T00:00:00Z</updated><source><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss" rel="alternate" type="text/html"/><link href="tests/data/spider/testfeed3.rss" rel="self" type="application/atom+xml"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><planet:name>three</planet:name></source></entry>

View File

@ -1,15 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>http://example.com/4</id>
<link href='http://example.com/4' type='text/html' rel='alternate'/>
<title>Mars</title>
<summary>the Red Planet</summary>
<updated planet:format='August 21, 2006 12:54 PM'>2006-08-21T12:54:31Z</updated>
<source>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>http://example.com/4</id><link href="http://example.com/4" rel="alternate" type="text/html"/><title>Mars</title><summary>the Red Planet</summary><updated planet:format="August 21, 2006 09:40 PM">2006-08-21T21:40:56Z</updated><source><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss" rel="alternate" type="text/html"/><link href="tests/data/spider/testfeed3.rss" rel="self" type="application/atom+xml"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><planet:name>three</planet:name></source></entry>

View File

@ -1,22 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1/1</id>
<link href='http://example.com/1' type='text/html' rel='alternate'/>
<title>Mercury</title>
<content>Messenger of the Roman Gods</content>
<updated planet:format='January 01, 2006 12:00 AM'>2006-01-01T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed1/1</id><link href="http://example.com/1" rel="alternate" type="text/html"/><title>Mercury</title><content>Messenger of the Roman Gods</content><updated planet:format="January 01, 2006 12:00 AM">2006-01-01T00:00:00Z</updated><source><id>tag:planet.intertwingly.net,2006:testfeed1</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>one</planet:name></source></entry>

View File

@ -1,23 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1/2</id>
<link href='http://example.com/2' type='text/html' rel='alternate'/>
<title>Venus</title>
<content>the Jewel of the Sky</content>
<updated planet:format='February 02, 2006 12:00 AM'>2006-02-02T00:00:00Z</updated>
<published planet:format='January 02, 2006 12:00 AM'>2006-01-02T00:00:00Z</published>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed1/2</id><link href="http://example.com/2" rel="alternate" type="text/html"/><title>Venus</title><content>the Jewel of the Sky</content><updated planet:format="February 02, 2006 12:00 AM">2006-02-02T00:00:00Z</updated><published planet:format="January 02, 2006 12:00 AM">2006-01-02T00:00:00Z</published><source><id>tag:planet.intertwingly.net,2006:testfeed1</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>one</planet:name></source></entry>

View File

@ -1,22 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1/3</id>
<link href='http://example.com/3' type='text/html' rel='alternate'/>
<title>Earth</title>
<content>the Blue Planet</content>
<updated planet:format='January 03, 2006 12:00 AM'>2006-01-03T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed1/3</id><link href="http://example.com/3" rel="alternate" type="text/html"/><title>Earth</title><content>the Blue Planet</content><updated planet:format="January 03, 2006 12:00 AM">2006-01-03T00:00:00Z</updated><source><id>tag:planet.intertwingly.net,2006:testfeed1</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>one</planet:name></source></entry>

View File

@ -1,22 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1/4</id>
<link href='http://example.com/4' type='text/html' rel='alternate'/>
<title>Mars</title>
<content>the Red Planet</content>
<updated planet:format='January 04, 2006 12:00 AM'>2006-01-04T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed1/4</id><link href="http://example.com/4" rel="alternate" type="text/html"/><title>Mars</title><content>the Red Planet</content><updated planet:format="January 04, 2006 12:00 AM">2006-01-04T00:00:00Z</updated><source><id>tag:planet.intertwingly.net,2006:testfeed1</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>one</planet:name></source></entry>

View File

@ -1,22 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2/1</id>
<link href='http://example.com/1' type='text/html' rel='alternate'/>
<title>Mercury</title>
<content>Messenger of the Roman Gods</content>
<updated planet:format='January 01, 2006 12:00 AM'>2006-01-01T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed2/1</id><link href="http://example.com/1" rel="alternate" type="text/html"/><title>Mercury</title><content>Messenger of the Roman Gods</content><updated planet:format="January 01, 2006 12:00 AM">2006-01-01T00:00:00Z</updated><source><id>tag:planet.intertwingly.net,2006:testfeed2</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>two</planet:name></source></entry>

View File

@ -1,22 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2/2</id>
<link href='http://example.com/2' type='text/html' rel='alternate'/>
<title>Venus</title>
<content>the Morning Star</content>
<updated planet:format='January 02, 2006 12:00 AM'>2006-01-02T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed2/2</id><link href="http://example.com/2" rel="alternate" type="text/html"/><title>Venus</title><content>the Morning Star</content><updated planet:format="January 02, 2006 12:00 AM">2006-01-02T00:00:00Z</updated><source><id>tag:planet.intertwingly.net,2006:testfeed2</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>two</planet:name></source></entry>

View File

@ -1,22 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2/3</id>
<link href='http://example.com/3' type='text/html' rel='alternate'/>
<title>Earth</title>
<content>the Blue Planet</content>
<updated planet:format='January 03, 2006 12:00 AM'>2006-01-03T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed2/3</id><link href="http://example.com/3" rel="alternate" type="text/html"/><title>Earth</title><content>the Blue Planet</content><updated planet:format="January 03, 2006 12:00 AM">2006-01-03T00:00:00Z</updated><source><id>tag:planet.intertwingly.net,2006:testfeed2</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>two</planet:name></source></entry>

View File

@ -1,22 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2/4</id>
<link href='http://example.com/4' type='text/html' rel='alternate'/>
<title>Mars</title>
<content>the Red Planet</content>
<updated planet:format='January 04, 2006 12:00 AM'>2006-01-04T00:00:00Z</updated>
<source>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed2/4</id><link href="http://example.com/4" rel="alternate" type="text/html"/><title>Mars</title><content>the Red Planet</content><updated planet:format="January 04, 2006 12:00 AM">2006-01-04T00:00:00Z</updated><source><id>tag:planet.intertwingly.net,2006:testfeed2</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>two</planet:name></source></entry>

View File

@ -1,15 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed3/1</id>
<link href='http://example.com/1' type='text/html' rel='alternate'/>
<title>Mercury</title>
<summary>Messenger of the Roman Gods</summary>
<updated planet:format='January 01, 2006 12:00 AM'>2006-01-01T00:00:00Z</updated>
<source>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed3/1</id><link href="http://example.com/1" rel="alternate" type="text/html"/><title>Mercury</title><summary>Messenger of the Roman Gods</summary><updated planet:format="January 01, 2006 12:00 AM">2006-01-01T00:00:00Z</updated><source><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss" rel="alternate" type="text/html"/><link href="tests/data/spider/testfeed3.rss" rel="self" type="application/atom+xml"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><planet:name>three</planet:name></source></entry>

View File

@ -1,15 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed3/2</id>
<link href='http://example.com/2' type='text/html' rel='alternate'/>
<title>Venus</title>
<summary>the Morning Star</summary>
<updated planet:format='August 21, 2006 12:54 PM'>2006-08-21T12:54:31Z</updated>
<source>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</source>
</entry>
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed3/2</id><link href="http://example.com/2" rel="alternate" type="text/html"/><title>Venus</title><summary>the Morning Star</summary><updated planet:format="August 21, 2006 09:40 PM">2006-08-21T21:40:56Z</updated><source><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss" rel="alternate" type="text/html"/><link href="tests/data/spider/testfeed3.rss" rel="self" type="application/atom+xml"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><planet:name>three</planet:name></source></entry>

View File

@ -1,15 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<feed xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed1</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>one</planet:name>
</feed>
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed1</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed1a.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>one</planet:name></feed>

View File

@ -1,15 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<feed xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<id>tag:planet.intertwingly.net,2006:testfeed2</id>
<author>
<name>Sam Ruby</name>
<email>rubys@intertwingly.net</email>
<uri>http://www.intertwingly.net/blog/</uri>
</author>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom' type='application/atom+xml' rel='self'/>
<link href='http://www.intertwingly.net/blog/' type='text/html' rel='alternate'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<updated planet:format='June 17, 2006 12:15 AM'>2006-06-17T00:15:18Z</updated>
<planet:name>two</planet:name>
</feed>
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><id>tag:planet.intertwingly.net,2006:testfeed2</id><author><name>Sam Ruby</name><email>rubys@intertwingly.net</email><uri>http://www.intertwingly.net/blog/</uri></author><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed2.atom" rel="self" type="application/atom+xml"/><link href="http://www.intertwingly.net/blog/" rel="alternate" type="text/html"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><updated planet:format="June 17, 2006 12:15 AM">2006-06-17T00:15:18Z</updated><planet:name>two</planet:name></feed>

View File

@ -1,8 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<feed xmlns='http://www.w3.org/2005/Atom' xmlns:planet='http://planet.intertwingly.net/'>
<link href='http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss' type='text/html' rel='alternate'/>
<link href='tests/data/spider/testfeed3.rss' type='application/atom+xml' rel='self'/>
<subtitle>Its just data</subtitle>
<title>Sam Ruby</title>
<planet:name>three</planet:name>
</feed>
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:planet="http://planet.intertwingly.net/"><link href="http://intertwingly.net/code/venus/tests/data/spider/testfeed3.rss" rel="alternate" type="text/html"/><link href="tests/data/spider/testfeed3.rss" rel="self" type="application/atom+xml"/><subtitle>Its just data</subtitle><title>Sam Ruby</title><planet:name>three</planet:name></feed>

169
tests/test_opml.py Normal file
View File

@ -0,0 +1,169 @@
#!/usr/bin/env python
import unittest
from planet.opml import opml2config
from ConfigParser import ConfigParser
class OpmlTest(unittest.TestCase):
"""
Test the opml2config function
"""
def setUp(self):
self.config = ConfigParser()
#
# Element
#
def test_outline_element(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text="sample feed"/>''', self.config)
self.assertEqual('sample feed',
self.config.get("http://example.com/feed.xml", 'name'))
def test_wrong_element(self):
opml2config('''<feed type="rss"
xmlUrl="http://example.com/feed.xml"
text="sample feed"/>''', self.config)
self.assertFalse(self.config.has_section("http://example.com/feed.xml"))
def test_illformed_xml_before(self):
opml2config('''<bad stuff before>
<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text="sample feed"/>''', self.config)
self.assertEqual('sample feed',
self.config.get("http://example.com/feed.xml", 'name'))
def test_illformed_xml_after(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text="sample feed"/>
<bad stuff after>''', self.config)
self.assertEqual('sample feed',
self.config.get("http://example.com/feed.xml", 'name'))
#
# Type
#
def test_type_missing(self):
opml2config('''<outline
xmlUrl="http://example.com/feed.xml"
text="sample feed"/>''', self.config)
self.assertEqual('sample feed',
self.config.get("http://example.com/feed.xml", 'name'))
def test_type_uppercase(self):
opml2config('''<outline type="RSS"
xmlUrl="http://example.com/feed.xml"
text="sample feed"/>''', self.config)
self.assertEqual('sample feed',
self.config.get("http://example.com/feed.xml", 'name'))
def test_type_atom(self):
opml2config('''<outline type="atom"
xmlUrl="http://example.com/feed.xml"
text="sample feed"/>''', self.config)
self.assertEqual('sample feed',
self.config.get("http://example.com/feed.xml", 'name'))
def test_wrong_type(self):
opml2config('''<outline type="other"
xmlUrl="http://example.com/feed.xml"
text="sample feed"/>''', self.config)
self.assertFalse(self.config.has_section("http://example.com/feed.xml"))
#
# xmlUrl
#
def test_xmlurl_wrong_case(self):
opml2config('''<outline type="rss"
xmlurl="http://example.com/feed.xml"
text="sample feed"/>''', self.config)
self.assertEqual('sample feed',
self.config.get("http://example.com/feed.xml", 'name'))
def test_missing_xmlUrl(self):
opml2config('''<outline type="rss"
text="sample feed"/>''', self.config)
self.assertFalse(self.config.has_section("http://example.com/feed.xml"))
def test_blank_xmlUrl(self):
opml2config('''<outline type="rss"
xmlUrl=""
text="sample feed"/>''', self.config)
self.assertFalse(self.config.has_section(""))
#
# text
#
def test_title_attribute(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
title="sample feed"/>''', self.config)
self.assertEqual('sample feed',
self.config.get("http://example.com/feed.xml", 'name'))
def test_missing_text(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
/>''', self.config)
self.assertFalse(self.config.has_section("http://example.com/feed.xml"))
def test_blank_text_no_title(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text=""/>''', self.config)
self.assertFalse(self.config.has_section("http://example.com/feed.xml"))
def test_blank_text_with_title(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text=""
title="sample feed"/>''', self.config)
self.assertEqual('sample feed',
self.config.get("http://example.com/feed.xml", 'name'))
def test_blank_text_blank_title(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text=""
title=""/>''', self.config)
self.assertFalse(self.config.has_section("http://example.com/feed.xml"))
def test_text_utf8(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text="Se\xc3\xb1or Frog\xe2\x80\x99s"/>''',
self.config)
self.assertEqual('Se\xc3\xb1or Frog\xe2\x80\x99s',
self.config.get("http://example.com/feed.xml", 'name'))
def test_text_win_1252(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text="Se\xf1or Frog\x92s"/>''', self.config)
self.assertEqual('Se\xc3\xb1or Frog\xe2\x80\x99s',
self.config.get("http://example.com/feed.xml", 'name'))
def test_text_entity(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text="Se&ntilde;or Frog&rsquo;s"/>''', self.config)
self.assertEqual('Se\xc3\xb1or Frog\xe2\x80\x99s',
self.config.get("http://example.com/feed.xml", 'name'))
def test_text_double_escaped(self):
opml2config('''<outline type="rss"
xmlUrl="http://example.com/feed.xml"
text="Se&amp;ntilde;or Frog&amp;rsquo;s"/>''', self.config)
self.assertEqual('Se\xc3\xb1or Frog\xe2\x80\x99s',
self.config.get("http://example.com/feed.xml", 'name'))
if __name__ == '__main__':
unittest.main()

53
tests/test_rlists.py Normal file
View File

@ -0,0 +1,53 @@
#!/usr/bin/env python
import unittest, os, shutil
from planet import config, opml
from os.path import split
from glob import glob
from ConfigParser import ConfigParser
workdir = 'tests/work/config/cache'
class ReadingListTest(unittest.TestCase):
def setUp(self):
config.load('tests/data/config/rlist.ini')
def tearDown(self):
shutil.rmtree(workdir)
os.removedirs(os.path.split(workdir)[0])
# administrivia
def test_feeds(self):
feeds = [split(feed)[1] for feed in config.feeds()]
feeds.sort()
self.assertEqual(['testfeed1a.atom', 'testfeed2.atom', 'testfeed3.rss'],
feeds)
# dictionaries
def test_feed_options(self):
feeds = dict([(split(feed)[1],feed) for feed in config.feeds()])
feed1 = feeds['testfeed1a.atom']
self.assertEqual('one', config.feed_options(feed1)['name'])
feed2 = feeds['testfeed2.atom']
self.assertEqual('two', config.feed_options(feed2)['name'])
# dictionaries
def test_cache(self):
cache = glob(os.path.join(workdir,'lists','*'))
self.assertTrue(1,len(cache))
file = open(cache[0])
data = file.read()
file.close()
parser = ConfigParser()
opml.opml2config(data, parser)
feeds = [split(feed)[1] for feed in parser.sections()]
feeds.sort()
self.assertEqual(['testfeed1a.atom', 'testfeed2.atom', 'testfeed3.rss'],
feeds)