Added basic support for FOAF recursion

[section]
content_type = foaf
depth = 1
This commit is contained in:
Elias Torres 2006-09-07 05:51:56 -04:00
parent a166531855
commit 525f8d9351
8 changed files with 330 additions and 255 deletions

View File

@ -2,9 +2,13 @@ xmlns = 'http://planet.intertwingly.net/'
logger = None
import os, sys, re
import config
config.__init__()
from ConfigParser import ConfigParser
from urlparse import urljoin
def getLogger(level):
""" get a logger with the specified log level """
global logger
@ -48,3 +52,85 @@ def setTimeout(timeout):
logger.info("Socket timeout set to %d seconds", timeout)
else:
logger.error("Unable to set timeout to %d seconds", timeout)
def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True):
global logger
try:
import urllib2, StringIO
from planet.spider import filename
# list cache file name
cache_filename = filename(config.cache_lists_directory(), list)
# retrieve list options (e.g., etag, last-modified) from cache
options = {}
# add original options
for key, value in orig_config.items(list):
options[key] = value
try:
if use_cache:
cached_config = ConfigParser()
cached_config.read(cache_filename)
for option in cached_config.options(list):
options[option] = cached_config.get(list,option)
except:
pass
cached_config = ConfigParser()
cached_config.add_section(list)
for key, value in options.items():
cached_config.set(list, key, value)
# read list
base = urljoin('file:', os.path.abspath(os.path.curdir))
request = urllib2.Request(urljoin(base + '/', list))
if options.has_key("etag"):
request.add_header('If-None-Match', options['etag'])
if options.has_key("last-modified"):
request.add_header('If-Modified-Since',
options['last-modified'])
response = urllib2.urlopen(request)
if response.headers.has_key('etag'):
cached_config.set(list, 'etag', response.headers['etag'])
if response.headers.has_key('last-modified'):
cached_config.set(list, 'last-modified',
response.headers['last-modified'])
# convert to config.ini
data = StringIO.StringIO(response.read())
if callback: callback(data, cached_config)
# write to cache
if use_cache:
cache = open(cache_filename, 'w')
cached_config.write(cache)
cache.close()
# re-parse and proceed
logger.debug("Using %s readinglist", list)
if re_read:
if use_cache:
orig_config.read(cache_filename)
else:
cdata = StringIO.StringIO()
cached_config.write(cdata)
cdata.seek(0)
orig_config.readfp(cdata)
except:
try:
if re_read:
if use_cache:
orig_config.read(cache_filename)
else:
cdata = StringIO.StringIO()
cached_config.write(cdata)
cdata.seek(0)
orig_config.readfp(cdata)
logger.info("Using cached %s readinglist", list)
except:
logger.exception("Unable to read %s readinglist", list)

View File

@ -169,70 +169,17 @@ def load(config_file):
if reading_lists:
if not os.path.exists(config.cache_lists_directory()):
os.makedirs(config.cache_lists_directory())
from planet.spider import filename
for list in reading_lists:
cache_filename = filename(config.cache_lists_directory(), list)
try:
import urllib2, StringIO
# retrieve list options (e.g., etag, last-modified) from cache
options = {}
# add original options
for key, value in parser.items(list):
options[key] = value
try:
cached_config = ConfigParser()
cached_config.read(cache_filename)
for option in cached_config.options(list):
options[option] = cached_config.get(list,option)
except:
pass
cached_config = ConfigParser()
cached_config.add_section(list)
for key, value in options.items():
cached_config.set(list, key, value)
# read list
base = urljoin('file:', os.path.abspath(os.path.curdir))
request = urllib2.Request(urljoin(base + '/', list))
if options.has_key("etag"):
request.add_header('If-None-Match', options['etag'])
if options.has_key("last-modified"):
request.add_header('If-Modified-Since',
options['last-modified'])
response = urllib2.urlopen(request)
if response.headers.has_key('etag'):
cached_config.set(list, 'etag', response.headers['etag'])
if response.headers.has_key('last-modified'):
cached_config.set(list, 'last-modified',
response.headers['last-modified'])
# convert to config.ini
data=StringIO.StringIO(response.read())
def data2config(data, cached_config):
if content_type(list).find('opml')>=0:
opml.opml2config(data, cached_config)
elif content_type(list).find('foaf')>=0:
foaf.foaf2config(data, cached_config)
if cached_config.sections() in [[], [list]]: raise Exception
if cached_config.sections() in [[], [list]]:
raise Exception
# write to cache
cache = open(cache_filename, 'w')
cached_config.write(cache)
cache.close()
# re-parse and proceed
log.debug("Using %s readinglist", list)
data.seek(0)
parser.read(cache_filename)
except:
try:
parser.read(cache_filename)
log.info("Using cached %s readinglist", list)
except:
log.exception("Unable to read %s readinglist", list)
continue
for list in reading_lists:
planet.downloadReadingList(list, parser, data2config)
def cache_sources_directory():
if parser.has_option('Planet', 'cache_sources_directory'):

View File

@ -1,7 +1,39 @@
from ConfigParser import ConfigParser
def load_accounts(config, section):
accounts = {}
if(config.has_option(section, 'online_accounts')):
values = config.get(section, 'online_accounts')
for account_map in values.split('\n'):
try:
homepage, map = account_map.split('|')
accounts[homepage] = map
except:
pass
return accounts
def load_model(rdf, base_uri):
if hasattr(rdf, 'find_statements'):
return rdf
if hasattr(rdf, 'read'):
rdf = rdf.read()
def handler(code, level, facility, message, line, column, byte, file, uri):
pass
from RDF import Model, Parser
model = Model()
Parser().parse_string_into_model(model,rdf,base_uri,handler)
return model
# input = foaf, output = ConfigParser
def foaf2config(rdf, config):
def foaf2config(rdf, config, subject=None):
if not config or not config.sections():
return
@ -14,79 +46,132 @@ def foaf2config(rdf, config):
except:
return
if hasattr(rdf, 'read'):
rdf = rdf.read()
# account mappings, none by default
# form: accounts = {url to service homepage (as found in FOAF)}|{URI template}\n*
# example: http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName}
accounts = {}
if(config.has_option(section, 'online_accounts')):
values = config.get(section, 'online_accounts')
for account_map in values.split('\n'):
try:
homepage, map = account_map.split('|')
accounts[homepage] = map
except:
pass
accounts = load_accounts(config, section)
model = Model()
def handler(code, level, facility, message, line, column, byte, file, uri):
pass
Parser().parse_string_into_model(model,rdf,section,handler)
depth = 0
if(config.has_option(section, 'depth')):
depth = config.getint(section, 'depth')
model = load_model(rdf, section)
dc = NS('http://purl.org/dc/elements/1.1/')
foaf = NS('http://xmlns.com/foaf/0.1/')
rdfs = NS('http://www.w3.org/2000/01/rdf-schema#')
for statement in model.find_statements(Statement(None,foaf.weblog,None)):
for statement in model.find_statements(Statement(subject,foaf.weblog,None)):
# feed owner
person = statement.subject
feed = model.get_target(statement.object,rdfs.seeAlso)
if not feed: continue
# title is required (at the moment)
title = model.get_target(person,foaf.name)
if not title: title = model.get_target(statement.object,dc.title)
if not title: continue
feed = str(feed.uri)
if not config.has_section(feed):
config.add_section(feed)
config.set(feed, 'name', str(title))
# if we don't have mappings, we're done
if not accounts.keys():
if not title:
continue
# now look for OnlineAccounts for the same person
for statement in model.find_statements(Statement(person,foaf.holdsAccount,None)):
rdfaccthome = model.get_target(statement.object,foaf.accountServiceHomepage)
rdfacctname = model.get_target(statement.object,foaf.accountName)
if not rdfaccthome or not rdfacctname: continue
if not rdfaccthome.is_resource() or not accounts.has_key(str(rdfaccthome.uri)): continue
if not rdfacctname.is_literal(): continue
rdfacctname = rdfacctname.literal_value['string']
rdfaccthome = str(rdfaccthome.uri)
# shorten feed title a bit
try:
servicetitle = rdfaccthome.replace('http://','').split('/')[0]
except:
servicetitle = rdfaccthome
feed = accounts[rdfaccthome].replace("{foaf:accountName}", rdfacctname)
# blog is optional
# TODO: check for rdf:type rss:channel
feed = model.get_target(statement.object,rdfs.seeAlso)
if feed:
feed = str(feed.uri)
if not config.has_section(feed):
config.add_section(feed)
config.set(feed, 'name', "%s (%s)" % (title, servicetitle))
config.set(feed, 'name', str(title))
# now look for OnlineAccounts for the same person
if accounts.keys():
for statement in model.find_statements(Statement(person,foaf.holdsAccount,None)):
rdfaccthome = model.get_target(statement.object,foaf.accountServiceHomepage)
rdfacctname = model.get_target(statement.object,foaf.accountName)
if not rdfaccthome or not rdfacctname: continue
if not rdfaccthome.is_resource() or not accounts.has_key(str(rdfaccthome.uri)): continue
if not rdfacctname.is_literal(): continue
rdfacctname = rdfacctname.literal_value['string']
rdfaccthome = str(rdfaccthome.uri)
# shorten feed title a bit
try:
servicetitle = rdfaccthome.replace('http://','').split('/')[0]
except:
servicetitle = rdfaccthome
feed = accounts[rdfaccthome].replace("{foaf:accountName}", rdfacctname)
if not config.has_section(feed):
config.add_section(feed)
config.set(feed, 'name', "%s (%s)" % (title, servicetitle))
if depth > 0:
# now the fun part, let's go after more friends
for statement in model.find_statements(Statement(person,foaf.knows,None)):
friend = statement.object
# let's be safe
if friend.is_literal(): continue
seeAlso = model.get_target(friend,rdfs.seeAlso)
# nothing to see
if not seeAlso or not seeAlso.is_resource(): continue
seeAlso = str(seeAlso.uri)
if not config.has_section(seeAlso):
config.add_section(seeAlso)
config.set(seeAlso, 'content_type', 'foaf')
config.set(seeAlso, 'depth', str(depth - 1))
try:
import planet
planet.downloadReadingList(seeAlso, config,
lambda data, subconfig : friend2config(model, friend, seeAlso, subconfig, data),
False)
except:
pass
return
def friend2config(friend_model, friend, seeAlso, subconfig, data):
try:
from RDF import Model, NS, Parser, Statement
except:
return
dc = NS('http://purl.org/dc/elements/1.1/')
foaf = NS('http://xmlns.com/foaf/0.1/')
rdf = NS('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
rdfs = NS('http://www.w3.org/2000/01/rdf-schema#')
# FOAF InverseFunctionalProperties
ifps = [foaf.mbox, foaf.mbox_sha1sum, foaf.jabberID, foaf.aimChatID,
foaf.icqChatID, foaf.yahooChatID, foaf.msnChatID, foaf.homepage, foaf.weblog]
model = load_model(data, seeAlso)
for statement in model.find_statements(Statement(None,rdf.type,foaf.Person)):
samefriend = statement.subject
# maybe they have the same uri
if friend.is_resource() and samefriend.is_resource():
# TODO
pass
for ifp in ifps:
object = model.get_target(samefriend,ifp)
if object and object == friend_model.get_target(friend, ifp):
foaf2config(model, subconfig, samefriend)
return
if __name__ == "__main__":
import sys, urllib
config = ConfigParser()

View File

@ -12,167 +12,44 @@
</foaf:PersonalProfileDocument>
<foaf:Person rdf:about="http://torrez.us/who#elias">
<foaf:name>Elias Torres</foaf:name>
<foaf:givenname>Elias</foaf:givenname>
<foaf:family_name>Torres</foaf:family_name>
<foaf:nick>EliasT</foaf:nick>
<foaf:mbox_sha1sum>171e69034f0c1563ed13d66d4abaa8b5d70e4e57</foaf:mbox_sha1sum>
<foaf:homepage rdf:resource="http://torrez.us/"/>
<foaf:workplaceHomepage rdf:resource="http://www.ibm.com/"/>
<foaf:schoolHomepage rdf:resource="http://www.usf.edu/"/>
<foaf:schoolHomepage rdf:resource="http://www.harvard.edu/"/>
<foaf:aimChatID>rico811</foaf:aimChatID>
<foaf:msnChatID>elias_torres@hotmail.com</foaf:msnChatID>
<foaf:knows>
<foaf:Person>
<foaf:name>Dan Smith</foaf:name>
<rdfs:seeAlso rdf:resource="http://www.sirpheon.com/foaf.rdf"/>
<foaf:mbox_sha1sum>bd4506f3e280442e626b7123d65dc6d7af348906</foaf:mbox_sha1sum>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person rdf:about="http://thefigtrees.net/lee/ldf-card#LDF">
<foaf:name>Lee Feigenbaum</foaf:name>
<foaf:homepage rdf:resource="http://thefigtrees.net/"/>
<foaf:mbox_sha1sum>15b51eca0082d66bee850ce4774ff2d9921c1f08</foaf:mbox_sha1sum>
<rdfs:seeAlso rdf:resource="http://thefigtrees.net/lee/ldf-card"/>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person>
<foaf:name>Dan Connolly</foaf:name>
<foaf:homepage rdf:resource="http://www.w3.org/People/Connolly/"/>
<foaf:mbox rdf:resource="mailto:connolly@w3.org" />
<rdfs:seeAlso>
<ical:Vcalendar rdf:about="http://www.w3.org/People/Connolly/home-smart.rdf"/>
</rdfs:seeAlso>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person>
<foaf:name>James Snell</foaf:name>
<foaf:homepage rdf:resource="http://snellspace.com/"/>
<foaf:mbox rdf:resource="mailto:jasnell@us.ibm.com" />
<rdfs:seeAlso rdf:resource="ldf-card.foaf"/>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person>
<foaf:name>Sam Ruby</foaf:name>
<foaf:mbox_sha1sum>703471c6f39094d88665d24ce72c42fdc5f20585</foaf:mbox_sha1sum>
<rdfs:seeAlso rdf:resource="rubys-card.foaf"/>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person>
<foaf:name>Mark Pilgrim</foaf:name>
<foaf:mbox_sha1sum>85d089d9dc87139d5542aa4ee2822bf65e56b55e</foaf:mbox_sha1sum>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person>
<foaf:name>Eric Miller</foaf:name>
<foaf:mbox_sha1sum>fd1c7fa497930b8b24e3998927fcebe63509ef20</foaf:mbox_sha1sum>
<foaf:homepage rdf:resource="http://purl.org/net/eric/"/>
<foaf:workplaceHomepage rdf:resource="http://www.w3.org/People/EM/"/>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person>
<foaf:name>Libby Miller</foaf:name>
<foaf:mbox_sha1sum>80f78952ba7c71bc9a0a38de415692922a3f6024</foaf:mbox_sha1sum>
<foaf:mbox_sha1sum>289d4d44325d0b0218edc856c8c3904fa3fd2875</foaf:mbox_sha1sum>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person>
<foaf:name>Dave Beckett</foaf:name>
<foaf:mbox_sha1sum>970987f991961f2553a1bf2574166fa29befbccb</foaf:mbox_sha1sum>
<rdfs:seeAlso rdf:resource="http://www.dajobe.org/foaf.rdf"/>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person>
<foaf:name>Kendall Grant Clark</foaf:name>
<foaf:mbox rdf:resource="mailto:kendall@monkeyfist.com"/>
<foaf:mbox rdf:resource="mailto:kclark@ntlug.org"/>
<rdfs:seeAlso rdf:resource="http://clark.dallas.tx.us/kendall/foaf.rdf"/>
</foaf:Person>
</foaf:knows>
<foaf:knows>
<foaf:Person>
<foaf:name>Bijan Parsia</foaf:name>
<foaf:mbox_sha1sum>f49a6854842c5fa76dc0edb8e82f8fe04fd56bc9</foaf:mbox_sha1sum>
</foaf:Person>
</foaf:knows>
<foaf:interest>
<rdf:Description rdf:about="http://dublincore.org/"
dc:title="Dublin Core Metadata Initiative" />
</foaf:interest>
<foaf:interest>
<rdf:Description rdf:about="http://www.w3.org/RDF/"
dc:title="Resource Description Framework (RDF)" />
</foaf:interest>
<foaf:interest>
<rdf:Description rdf:about="http://purl.org/rss/"
dc:title="RDF Site Summary (RSS 1.0)" />
</foaf:interest>
<foaf:interest>
<rdf:Description rdf:about="http://www.w3.org/2000/01/sw/"
dc:title="Semantic Web" />
</foaf:interest>
<foaf:interest>
<rdf:Description rdf:about="http://www.atomenabled.org/"
dc:title="Atom" />
</foaf:interest>
<rdfs:seeAlso>
<ical:Vcalendar rdf:about="http://torrez.us/elias/school.ics" />
</rdfs:seeAlso>
<foaf:weblog>
<foaf:Document rdf:about="http://torrez.us/">
<dc:title>Elias Torres</dc:title>
<rdfs:seeAlso>
<rss:channel rdf:about="http://torrez.us/feed/rdf" />
</rdfs:seeAlso>
</foaf:Document>
</foaf:weblog>
<foaf:weblog>
<foaf:Document rdf:about="http://torrez.us/">
<dc:title>Elias Torres</dc:title>
<rdfs:seeAlso>
<rss:channel rdf:about="http://torrez.us/feed/rdf">
<foaf:maker rdf:resource="http://torrez.us/who#elias"/>
<foaf:topic rdf:resource="http://www.w3.org/2001/sw/"/>
<foaf:topic rdf:resource="http://www.w3.org/RDF/"/>
</rss:channel>
</rdfs:seeAlso>
</foaf:Document>
</foaf:weblog>
<foaf:holdsAccount>
<foaf:OnlineAccount>
<foaf:accountServiceHomepage rdf:resource="http://del.icio.us/"/>
<foaf:accountName>eliast</foaf:accountName>
</foaf:OnlineAccount>
</foaf:holdsAccount>
<foaf:holdsAccount>
<foaf:OnlineAccount>
<foaf:accountServiceHomepage rdf:resource="http://flickr.com/"/>
<foaf:accountName>77366516@N00</foaf:accountName>
</foaf:OnlineAccount>
</foaf:holdsAccount>
<foaf:holdsAccount>
<foaf:OnlineAccount>
<foaf:accountServiceHomepage rdf:resource="http://del.icio.us/"/>
<foaf:accountName>eliast</foaf:accountName>
</foaf:OnlineAccount>
</foaf:holdsAccount>
<foaf:holdsAccount>
<foaf:OnlineAccount>
<foaf:accountServiceHomepage rdf:resource="http://flickr.com/"/>
<foaf:accountName>77366516@N00</foaf:accountName>
</foaf:OnlineAccount>
</foaf:holdsAccount>
</foaf:Person>
<rdf:Description rdf:about="http://rdflib.net#">
<doap:developer rdf:resource="http://torrez.us/who#elias"/>
</rdf:Description>
<!--
<foaf:Person rdf:about="http://torrez.us/who#alejandra">
<foaf:name>Alejandra Torres</foaf:name>
<foaf:mbox_sha1sum>9cb53a79c6290901979a1887ccfb422b399c530f</foaf:mbox_sha1sum>
</foaf:Person>
<foaf:Person rdf:about="http://torrez.us/who#briana">
<foaf:name>Briana Torres</foaf:name>
<foaf:mbox_sha1sum>0848c584116ae83e03e89f85698ae63151e6dc84</foaf:mbox_sha1sum>
</foaf:Person>
<foaf:Person rdf:about="http://torrez.us/who#noah">
<foaf:name>Noah Torres</foaf:name>
<foaf:mbox_sha1sum>0e2b5e875c7646a20199d751760c354b4f7cfa2b</foaf:mbox_sha1sum>
</foaf:Person>
-->
</rdf:RDF>

View File

@ -0,0 +1,7 @@
[Planet]
name = FOAF Test Configuration
cache_directory = tests/work/config/cache
[tests/data/config/eliast.foaf]
content_type = foaf
depth=1

View File

@ -0,0 +1,33 @@
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:doap="http://usefulinc.com/ns/doap#"
xmlns:ical="http://www.w3.org/2002/12/cal/icaltzd#"
xmlns:rss="http://purl.org/rss/1.0/"
xmlns:foaf="http://xmlns.com/foaf/0.1/">
<foaf:PersonalProfileDocument rdf:about="http://torrez.us/who">
<foaf:primaryTopic rdf:resource="http://thefigtrees.net/lee/ldf-card#LDF"/>
</foaf:PersonalProfileDocument>
<foaf:Person rdf:about="http://thefigtrees.net/lee/ldf-card#LDF">
<foaf:name>Lee Feigenbaum</foaf:name>
<foaf:mbox_sha1sum>15b51eca0082d66bee850ce4774ff2d9921c1f08</foaf:mbox_sha1sum>
<foaf:weblog>
<foaf:Document rdf:about="http://thefigtrees.net/">
<dc:title>Lee Feigenbaum</dc:title>
<rdfs:seeAlso>
<rss:channel rdf:about="http://thefigtrees.net/lee/life/atom.xml" />
</rdfs:seeAlso>
</foaf:Document>
</foaf:weblog>
<foaf:holdsAccount>
<foaf:OnlineAccount>
<foaf:accountServiceHomepage rdf:resource="http://del.icio.us/"/>
<foaf:accountName>leef</foaf:accountName>
</foaf:OnlineAccount>
</foaf:holdsAccount>
</foaf:Person>
</rdf:RDF>

View File

@ -0,0 +1,33 @@
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:doap="http://usefulinc.com/ns/doap#"
xmlns:ical="http://www.w3.org/2002/12/cal/icaltzd#"
xmlns:rss="http://purl.org/rss/1.0/"
xmlns:foaf="http://xmlns.com/foaf/0.1/">
<foaf:PersonalProfileDocument rdf:about="http://torrez.us/who">
<foaf:primaryTopic rdf:resource="#sam"/>
</foaf:PersonalProfileDocument>
<foaf:Person rdf:about="#sam">
<foaf:name>Sam Ruby</foaf:name>
<foaf:mbox_sha1sum>703471c6f39094d88665d24ce72c42fdc5f20585</foaf:mbox_sha1sum>
<foaf:weblog>
<foaf:Document rdf:about="http://intertwingly.net/blog/">
<dc:title>Its just data.</dc:title>
<rdfs:seeAlso>
<rss:channel rdf:about="http://intertwingly.net/blog/atom.xml" />
</rdfs:seeAlso>
</foaf:Document>
</foaf:weblog>
<foaf:holdsAccount>
<foaf:OnlineAccount>
<foaf:accountServiceHomepage rdf:resource="http://del.icio.us/"/>
<foaf:accountName>rubys</foaf:accountName>
</foaf:OnlineAccount>
</foaf:holdsAccount>
</foaf:Person>
</rdf:RDF>

View File

@ -105,6 +105,13 @@ class FoafTest(unittest.TestCase):
'http://del.icio.us/rss/eliast',
'http://torrez.us/feed/rdf'], feeds)
def test_recursive(self):
config.load('tests/data/config/foaf-deep.ini')
feeds = config.subscriptions()
feeds.sort()
self.assertEqual(['http://intertwingly.net/blog/atom.xml',
'http://thefigtrees.net/lee/life/atom.xml',
'http://torrez.us/feed/rdf'], feeds)
# these tests only make sense if libRDF is installed
try: