diff --git a/planet/__init__.py b/planet/__init__.py
index 9babf19..e24473c 100644
--- a/planet/__init__.py
+++ b/planet/__init__.py
@@ -2,9 +2,13 @@ xmlns = 'http://planet.intertwingly.net/'
logger = None
+import os, sys, re
import config
config.__init__()
+from ConfigParser import ConfigParser
+from urlparse import urljoin
+
def getLogger(level):
""" get a logger with the specified log level """
global logger
@@ -48,3 +52,85 @@ def setTimeout(timeout):
logger.info("Socket timeout set to %d seconds", timeout)
else:
logger.error("Unable to set timeout to %d seconds", timeout)
+
+def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True):
+ global logger
+ try:
+
+ import urllib2, StringIO
+ from planet.spider import filename
+
+ # list cache file name
+ cache_filename = filename(config.cache_lists_directory(), list)
+
+ # retrieve list options (e.g., etag, last-modified) from cache
+ options = {}
+
+ # add original options
+ for key, value in orig_config.items(list):
+ options[key] = value
+
+ try:
+ if use_cache:
+ cached_config = ConfigParser()
+ cached_config.read(cache_filename)
+ for option in cached_config.options(list):
+ options[option] = cached_config.get(list,option)
+ except:
+ pass
+
+ cached_config = ConfigParser()
+ cached_config.add_section(list)
+ for key, value in options.items():
+ cached_config.set(list, key, value)
+
+ # read list
+ base = urljoin('file:', os.path.abspath(os.path.curdir))
+ request = urllib2.Request(urljoin(base + '/', list))
+ if options.has_key("etag"):
+ request.add_header('If-None-Match', options['etag'])
+ if options.has_key("last-modified"):
+ request.add_header('If-Modified-Since',
+ options['last-modified'])
+ response = urllib2.urlopen(request)
+ if response.headers.has_key('etag'):
+ cached_config.set(list, 'etag', response.headers['etag'])
+ if response.headers.has_key('last-modified'):
+ cached_config.set(list, 'last-modified',
+ response.headers['last-modified'])
+
+ # convert to config.ini
+ data = StringIO.StringIO(response.read())
+
+ if callback: callback(data, cached_config)
+
+ # write to cache
+ if use_cache:
+ cache = open(cache_filename, 'w')
+ cached_config.write(cache)
+ cache.close()
+
+ # re-parse and proceed
+ logger.debug("Using %s readinglist", list)
+ if re_read:
+ if use_cache:
+ orig_config.read(cache_filename)
+ else:
+ cdata = StringIO.StringIO()
+ cached_config.write(cdata)
+ cdata.seek(0)
+ orig_config.readfp(cdata)
+ except:
+ try:
+ if re_read:
+ if use_cache:
+ orig_config.read(cache_filename)
+ else:
+ cdata = StringIO.StringIO()
+ cached_config.write(cdata)
+ cdata.seek(0)
+ orig_config.readfp(cdata)
+ logger.info("Using cached %s readinglist", list)
+ except:
+ logger.exception("Unable to read %s readinglist", list)
+
diff --git a/planet/config.py b/planet/config.py
index 312c1f0..b3306de 100644
--- a/planet/config.py
+++ b/planet/config.py
@@ -169,70 +169,17 @@ def load(config_file):
if reading_lists:
if not os.path.exists(config.cache_lists_directory()):
os.makedirs(config.cache_lists_directory())
- from planet.spider import filename
- for list in reading_lists:
- cache_filename = filename(config.cache_lists_directory(), list)
- try:
- import urllib2, StringIO
- # retrieve list options (e.g., etag, last-modified) from cache
- options = {}
-
- # add original options
- for key, value in parser.items(list):
- options[key] = value
-
- try:
- cached_config = ConfigParser()
- cached_config.read(cache_filename)
- for option in cached_config.options(list):
- options[option] = cached_config.get(list,option)
- except:
- pass
- cached_config = ConfigParser()
- cached_config.add_section(list)
- for key, value in options.items():
- cached_config.set(list, key, value)
-
- # read list
- base = urljoin('file:', os.path.abspath(os.path.curdir))
- request = urllib2.Request(urljoin(base + '/', list))
- if options.has_key("etag"):
- request.add_header('If-None-Match', options['etag'])
- if options.has_key("last-modified"):
- request.add_header('If-Modified-Since',
- options['last-modified'])
- response = urllib2.urlopen(request)
- if response.headers.has_key('etag'):
- cached_config.set(list, 'etag', response.headers['etag'])
- if response.headers.has_key('last-modified'):
- cached_config.set(list, 'last-modified',
- response.headers['last-modified'])
-
- # convert to config.ini
- data=StringIO.StringIO(response.read())
+ def data2config(data, cached_config):
if content_type(list).find('opml')>=0:
opml.opml2config(data, cached_config)
elif content_type(list).find('foaf')>=0:
foaf.foaf2config(data, cached_config)
- if cached_config.sections() in [[], [list]]: raise Exception
+ if cached_config.sections() in [[], [list]]:
+ raise Exception
- # write to cache
- cache = open(cache_filename, 'w')
- cached_config.write(cache)
- cache.close()
-
- # re-parse and proceed
- log.debug("Using %s readinglist", list)
- data.seek(0)
- parser.read(cache_filename)
- except:
- try:
- parser.read(cache_filename)
- log.info("Using cached %s readinglist", list)
- except:
- log.exception("Unable to read %s readinglist", list)
- continue
+ for list in reading_lists:
+ planet.downloadReadingList(list, parser, data2config)
def cache_sources_directory():
if parser.has_option('Planet', 'cache_sources_directory'):
diff --git a/planet/foaf.py b/planet/foaf.py
index 9a0e5b5..5f97d3c 100644
--- a/planet/foaf.py
+++ b/planet/foaf.py
@@ -1,24 +1,6 @@
from ConfigParser import ConfigParser
-# input = foaf, output = ConfigParser
-def foaf2config(rdf, config=None):
-
- if not config or not config.sections():
- config = ConfigParser()
-
- section = config.sections().pop()
-
- try:
- from RDF import Model, NS, Parser, Statement
- except:
- return config
-
- if hasattr(rdf, 'read'):
- rdf = rdf.read()
-
- # account mappings, none by default
- # form: accounts = {url to service homepage (as found in FOAF)}|{URI template}\n*
- # example: http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName}
+def load_accounts(config, section):
accounts = {}
if(config.has_option(section, 'online_accounts')):
values = config.get(section, 'online_accounts')
@@ -29,62 +11,166 @@ def foaf2config(rdf, config=None):
except:
pass
- model = Model()
+ return accounts
+
+def load_model(rdf, base_uri):
+
+ if hasattr(rdf, 'find_statements'):
+ return rdf
+
+ if hasattr(rdf, 'read'):
+ rdf = rdf.read()
+
def handler(code, level, facility, message, line, column, byte, file, uri):
pass
- Parser().parse_string_into_model(model,rdf,section,handler)
+
+ from RDF import Model, Parser
+
+ model = Model()
+
+ Parser().parse_string_into_model(model,rdf,base_uri,handler)
+
+ return model
+
+# input = foaf, output = ConfigParser
+def foaf2config(rdf, config, subject=None):
+
+ if not config or not config.sections():
+ return
+
+ # there should be only be 1 section
+ section = config.sections().pop()
+
+ try:
+ from RDF import Model, NS, Parser, Statement
+ except:
+ return
+
+ # account mappings, none by default
+ # form: accounts = {url to service homepage (as found in FOAF)}|{URI template}\n*
+ # example: http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName}
+ accounts = load_accounts(config, section)
+
+ depth = 0
+
+ if(config.has_option(section, 'depth')):
+ depth = config.getint(section, 'depth')
+
+ model = load_model(rdf, section)
dc = NS('http://purl.org/dc/elements/1.1/')
foaf = NS('http://xmlns.com/foaf/0.1/')
rdfs = NS('http://www.w3.org/2000/01/rdf-schema#')
- for statement in model.find_statements(Statement(None,foaf.weblog,None)):
+ for statement in model.find_statements(Statement(subject,foaf.weblog,None)):
# feed owner
person = statement.subject
-
- feed = model.get_target(statement.object,rdfs.seeAlso)
- if not feed: continue
+ # title is required (at the moment)
title = model.get_target(person,foaf.name)
if not title: title = model.get_target(statement.object,dc.title)
- if not title: continue
-
- feed = str(feed.uri)
- if not config.has_section(feed):
- config.add_section(feed)
- config.set(feed, 'name', str(title))
-
- # if we don't have mappings, we're done
- if not accounts.keys():
+ if not title:
continue
- # now look for OnlineAccounts for the same person
- for statement in model.find_statements(Statement(person,foaf.holdsAccount,None)):
- rdfaccthome = model.get_target(statement.object,foaf.accountServiceHomepage)
- rdfacctname = model.get_target(statement.object,foaf.accountName)
-
- if not rdfaccthome or not rdfacctname: continue
-
- if not rdfaccthome.is_resource() or not accounts.has_key(str(rdfaccthome.uri)): continue
-
- if not rdfacctname.is_literal(): continue
-
- rdfacctname = rdfacctname.literal_value['string']
- rdfaccthome = str(rdfaccthome.uri)
-
- # shorten feed title a bit
- try:
- servicetitle = rdfaccthome.replace('http://','').split('/')[0]
- except:
- servicetitle = rdfaccthome
-
- feed = accounts[rdfaccthome].replace("{foaf:accountName}", rdfacctname)
+ # blog is optional
+ # TODO: check for rdf:type rss:channel
+ feed = model.get_target(statement.object,rdfs.seeAlso)
+ if feed:
+ feed = str(feed.uri)
if not config.has_section(feed):
config.add_section(feed)
- config.set(feed, 'name', "%s (%s)" % (title, servicetitle))
+ config.set(feed, 'name', str(title))
- return config
+ # now look for OnlineAccounts for the same person
+ if accounts.keys():
+ for statement in model.find_statements(Statement(person,foaf.holdsAccount,None)):
+ rdfaccthome = model.get_target(statement.object,foaf.accountServiceHomepage)
+ rdfacctname = model.get_target(statement.object,foaf.accountName)
+
+ if not rdfaccthome or not rdfacctname: continue
+
+ if not rdfaccthome.is_resource() or not accounts.has_key(str(rdfaccthome.uri)): continue
+
+ if not rdfacctname.is_literal(): continue
+
+ rdfacctname = rdfacctname.literal_value['string']
+ rdfaccthome = str(rdfaccthome.uri)
+
+ # shorten feed title a bit
+ try:
+ servicetitle = rdfaccthome.replace('http://','').split('/')[0]
+ except:
+ servicetitle = rdfaccthome
+
+ feed = accounts[rdfaccthome].replace("{foaf:accountName}", rdfacctname)
+ if not config.has_section(feed):
+ config.add_section(feed)
+ config.set(feed, 'name', "%s (%s)" % (title, servicetitle))
+
+ if depth > 0:
+
+ # now the fun part, let's go after more friends
+ for statement in model.find_statements(Statement(person,foaf.knows,None)):
+ friend = statement.object
+
+ # let's be safe
+ if friend.is_literal(): continue
+
+ seeAlso = model.get_target(friend,rdfs.seeAlso)
+
+ # nothing to see
+ if not seeAlso or not seeAlso.is_resource(): continue
+
+ seeAlso = str(seeAlso.uri)
+
+ if not config.has_section(seeAlso):
+ config.add_section(seeAlso)
+ config.set(seeAlso, 'content_type', 'foaf')
+ config.set(seeAlso, 'depth', str(depth - 1))
+
+ try:
+ import planet
+ planet.downloadReadingList(seeAlso, config,
+ lambda data, subconfig : friend2config(model, friend, seeAlso, subconfig, data),
+ False)
+ except:
+ pass
+
+ return
+
+def friend2config(friend_model, friend, seeAlso, subconfig, data):
+
+ try:
+ from RDF import Model, NS, Parser, Statement
+ except:
+ return
+
+ dc = NS('http://purl.org/dc/elements/1.1/')
+ foaf = NS('http://xmlns.com/foaf/0.1/')
+ rdf = NS('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
+ rdfs = NS('http://www.w3.org/2000/01/rdf-schema#')
+
+ # FOAF InverseFunctionalProperties
+ ifps = [foaf.mbox, foaf.mbox_sha1sum, foaf.jabberID, foaf.aimChatID,
+ foaf.icqChatID, foaf.yahooChatID, foaf.msnChatID, foaf.homepage, foaf.weblog]
+
+ model = load_model(data, seeAlso)
+
+ for statement in model.find_statements(Statement(None,rdf.type,foaf.Person)):
+
+ samefriend = statement.subject
+
+ # maybe they have the same uri
+ if friend.is_resource() and samefriend.is_resource():
+ # TODO
+ pass
+
+ for ifp in ifps:
+ object = model.get_target(samefriend,ifp)
+ if object and object == friend_model.get_target(friend, ifp):
+ foaf2config(model, subconfig, samefriend)
+ return
if __name__ == "__main__":
import sys, urllib
diff --git a/tests/data/config/another.foaf b/tests/data/config/another.foaf
new file mode 100644
index 0000000..2e5bbb5
--- /dev/null
+++ b/tests/data/config/another.foaf
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+ Another Elias Torres
+
+
+
+ Elias Torres
+
+
+
+
+
+
+
+
+
+
+ SOMEID
+
+
+
+
+
+ SOMEID
+
+
+
+
+
diff --git a/tests/data/config/eliast.foaf b/tests/data/config/eliast.foaf
index a9e2f7a..1c56f08 100644
--- a/tests/data/config/eliast.foaf
+++ b/tests/data/config/eliast.foaf
@@ -12,167 +12,44 @@
Elias Torres
- Elias
- Torres
- EliasT
- 171e69034f0c1563ed13d66d4abaa8b5d70e4e57
-
-
-
-
- rico811
- elias_torres@hotmail.com
-
-
- Dan Smith
-
- bd4506f3e280442e626b7123d65dc6d7af348906
-
-
Lee Feigenbaum
15b51eca0082d66bee850ce4774ff2d9921c1f08
-
-
-
-
-
- Dan Connolly
-
-
-
-
-
-
-
-
-
- James Snell
-
-
+
Sam Ruby
703471c6f39094d88665d24ce72c42fdc5f20585
+
-
-
- Mark Pilgrim
- 85d089d9dc87139d5542aa4ee2822bf65e56b55e
-
-
-
-
- Eric Miller
- fd1c7fa497930b8b24e3998927fcebe63509ef20
-
-
-
-
-
-
- Libby Miller
- 80f78952ba7c71bc9a0a38de415692922a3f6024
- 289d4d44325d0b0218edc856c8c3904fa3fd2875
-
-
-
-
- Dave Beckett
- 970987f991961f2553a1bf2574166fa29befbccb
-
-
-
-
-
- Kendall Grant Clark
-
-
-
-
-
-
-
- Bijan Parsia
- f49a6854842c5fa76dc0edb8e82f8fe04fd56bc9
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+ Elias Torres
+
+
+
+
+
-
-
- Elias Torres
-
-
-
-
-
-
-
-
-
+
+
+
+ eliast
+
+
+
+
+
+ 77366516@N00
+
+
-
-
-
- eliast
-
-
-
-
-
- 77366516@N00
-
-
-
-
-
-
-
-
-
diff --git a/tests/data/config/foaf-deep.ini b/tests/data/config/foaf-deep.ini
new file mode 100644
index 0000000..a38e409
--- /dev/null
+++ b/tests/data/config/foaf-deep.ini
@@ -0,0 +1,7 @@
+[Planet]
+name = FOAF Test Configuration
+cache_directory = tests/work/config/cache
+
+[tests/data/config/eliast.foaf]
+content_type = foaf
+depth=1
diff --git a/tests/data/config/foaf-multiple.ini b/tests/data/config/foaf-multiple.ini
new file mode 100644
index 0000000..3272baf
--- /dev/null
+++ b/tests/data/config/foaf-multiple.ini
@@ -0,0 +1,18 @@
+[Planet]
+name = FOAF Test Configuration
+cache_directory = tests/work/config/cache
+
+[tests/data/config/eliast.foaf]
+content_type = foaf
+random_setting = eliast
+online_accounts =
+ http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName}
+ http://flickr.com/|http://api.flickr.com/services/feeds/photos_public.gne?id={foaf:accountName}
+
+[tests/data/config/another.foaf]
+content_type = foaf
+random_setting = another
+online_accounts =
+ http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName}
+ http://flickr.com/|http://api.flickr.com/services/feeds/photos_public.gne?id={foaf:accountName}
+
diff --git a/tests/data/config/foaf.ini b/tests/data/config/foaf.ini
index db7a7fb..281a995 100644
--- a/tests/data/config/foaf.ini
+++ b/tests/data/config/foaf.ini
@@ -7,4 +7,3 @@ content_type = foaf
online_accounts =
http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName}
http://flickr.com/|http://api.flickr.com/services/feeds/photos_public.gne?id={foaf:accountName}
-
diff --git a/tests/data/config/ldf-card.foaf b/tests/data/config/ldf-card.foaf
new file mode 100644
index 0000000..50a0855
--- /dev/null
+++ b/tests/data/config/ldf-card.foaf
@@ -0,0 +1,33 @@
+
+
+
+
+
+ Lee Feigenbaum
+ 15b51eca0082d66bee850ce4774ff2d9921c1f08
+
+
+
+ Lee Feigenbaum
+
+
+
+
+
+
+
+
+
+ leef
+
+
+
+
+
diff --git a/tests/data/config/rubys-card.foaf b/tests/data/config/rubys-card.foaf
new file mode 100644
index 0000000..7bab6ac
--- /dev/null
+++ b/tests/data/config/rubys-card.foaf
@@ -0,0 +1,33 @@
+
+
+
+
+
+ Sam Ruby
+ 703471c6f39094d88665d24ce72c42fdc5f20585
+
+
+
+ Its just data.
+
+
+
+
+
+
+
+
+
+ rubys
+
+
+
+
+
diff --git a/tests/test_foaf.py b/tests/test_foaf.py
index adc2022..4d9b355 100644
--- a/tests/test_foaf.py
+++ b/tests/test_foaf.py
@@ -91,6 +91,28 @@ class FoafTest(unittest.TestCase):
'http://del.icio.us/rss/eliast',
'http://torrez.us/feed/rdf'], feeds)
+ def test_multiple_subscriptions(self):
+ config.load('tests/data/config/foaf-multiple.ini')
+ self.assertEqual(2,len(config.reading_lists()))
+ feeds = config.subscriptions()
+ feeds.sort()
+ self.assertEqual(5,len(feeds))
+ self.assertEqual(['http://api.flickr.com/services/feeds/' +
+ 'photos_public.gne?id=77366516@N00',
+ 'http://api.flickr.com/services/feeds/' +
+ 'photos_public.gne?id=SOMEID',
+ 'http://del.icio.us/rss/SOMEID',
+ 'http://del.icio.us/rss/eliast',
+ 'http://torrez.us/feed/rdf'], feeds)
+
+ def test_recursive(self):
+ config.load('tests/data/config/foaf-deep.ini')
+ feeds = config.subscriptions()
+ feeds.sort()
+ self.assertEqual(['http://intertwingly.net/blog/atom.xml',
+ 'http://thefigtrees.net/lee/life/atom.xml',
+ 'http://torrez.us/feed/rdf'], feeds)
+
# these tests only make sense if libRDF is installed
try:
import RDF