diff --git a/planet/__init__.py b/planet/__init__.py index 9babf19..e24473c 100644 --- a/planet/__init__.py +++ b/planet/__init__.py @@ -2,9 +2,13 @@ xmlns = 'http://planet.intertwingly.net/' logger = None +import os, sys, re import config config.__init__() +from ConfigParser import ConfigParser +from urlparse import urljoin + def getLogger(level): """ get a logger with the specified log level """ global logger @@ -48,3 +52,85 @@ def setTimeout(timeout): logger.info("Socket timeout set to %d seconds", timeout) else: logger.error("Unable to set timeout to %d seconds", timeout) + +def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True): + global logger + try: + + import urllib2, StringIO + from planet.spider import filename + + # list cache file name + cache_filename = filename(config.cache_lists_directory(), list) + + # retrieve list options (e.g., etag, last-modified) from cache + options = {} + + # add original options + for key, value in orig_config.items(list): + options[key] = value + + try: + if use_cache: + cached_config = ConfigParser() + cached_config.read(cache_filename) + for option in cached_config.options(list): + options[option] = cached_config.get(list,option) + except: + pass + + cached_config = ConfigParser() + cached_config.add_section(list) + for key, value in options.items(): + cached_config.set(list, key, value) + + # read list + base = urljoin('file:', os.path.abspath(os.path.curdir)) + request = urllib2.Request(urljoin(base + '/', list)) + if options.has_key("etag"): + request.add_header('If-None-Match', options['etag']) + if options.has_key("last-modified"): + request.add_header('If-Modified-Since', + options['last-modified']) + response = urllib2.urlopen(request) + if response.headers.has_key('etag'): + cached_config.set(list, 'etag', response.headers['etag']) + if response.headers.has_key('last-modified'): + cached_config.set(list, 'last-modified', + response.headers['last-modified']) + + # convert to config.ini + data = StringIO.StringIO(response.read()) + + if callback: callback(data, cached_config) + + # write to cache + if use_cache: + cache = open(cache_filename, 'w') + cached_config.write(cache) + cache.close() + + # re-parse and proceed + logger.debug("Using %s readinglist", list) + if re_read: + if use_cache: + orig_config.read(cache_filename) + else: + cdata = StringIO.StringIO() + cached_config.write(cdata) + cdata.seek(0) + orig_config.readfp(cdata) + except: + try: + if re_read: + if use_cache: + orig_config.read(cache_filename) + else: + cdata = StringIO.StringIO() + cached_config.write(cdata) + cdata.seek(0) + orig_config.readfp(cdata) + logger.info("Using cached %s readinglist", list) + except: + logger.exception("Unable to read %s readinglist", list) + diff --git a/planet/config.py b/planet/config.py index 312c1f0..b3306de 100644 --- a/planet/config.py +++ b/planet/config.py @@ -169,70 +169,17 @@ def load(config_file): if reading_lists: if not os.path.exists(config.cache_lists_directory()): os.makedirs(config.cache_lists_directory()) - from planet.spider import filename - for list in reading_lists: - cache_filename = filename(config.cache_lists_directory(), list) - try: - import urllib2, StringIO - # retrieve list options (e.g., etag, last-modified) from cache - options = {} - - # add original options - for key, value in parser.items(list): - options[key] = value - - try: - cached_config = ConfigParser() - cached_config.read(cache_filename) - for option in cached_config.options(list): - options[option] = cached_config.get(list,option) - except: - pass - cached_config = ConfigParser() - cached_config.add_section(list) - for key, value in options.items(): - cached_config.set(list, key, value) - - # read list - base = urljoin('file:', os.path.abspath(os.path.curdir)) - request = urllib2.Request(urljoin(base + '/', list)) - if options.has_key("etag"): - request.add_header('If-None-Match', options['etag']) - if options.has_key("last-modified"): - request.add_header('If-Modified-Since', - options['last-modified']) - response = urllib2.urlopen(request) - if response.headers.has_key('etag'): - cached_config.set(list, 'etag', response.headers['etag']) - if response.headers.has_key('last-modified'): - cached_config.set(list, 'last-modified', - response.headers['last-modified']) - - # convert to config.ini - data=StringIO.StringIO(response.read()) + def data2config(data, cached_config): if content_type(list).find('opml')>=0: opml.opml2config(data, cached_config) elif content_type(list).find('foaf')>=0: foaf.foaf2config(data, cached_config) - if cached_config.sections() in [[], [list]]: raise Exception + if cached_config.sections() in [[], [list]]: + raise Exception - # write to cache - cache = open(cache_filename, 'w') - cached_config.write(cache) - cache.close() - - # re-parse and proceed - log.debug("Using %s readinglist", list) - data.seek(0) - parser.read(cache_filename) - except: - try: - parser.read(cache_filename) - log.info("Using cached %s readinglist", list) - except: - log.exception("Unable to read %s readinglist", list) - continue + for list in reading_lists: + planet.downloadReadingList(list, parser, data2config) def cache_sources_directory(): if parser.has_option('Planet', 'cache_sources_directory'): diff --git a/planet/foaf.py b/planet/foaf.py index 9a0e5b5..5f97d3c 100644 --- a/planet/foaf.py +++ b/planet/foaf.py @@ -1,24 +1,6 @@ from ConfigParser import ConfigParser -# input = foaf, output = ConfigParser -def foaf2config(rdf, config=None): - - if not config or not config.sections(): - config = ConfigParser() - - section = config.sections().pop() - - try: - from RDF import Model, NS, Parser, Statement - except: - return config - - if hasattr(rdf, 'read'): - rdf = rdf.read() - - # account mappings, none by default - # form: accounts = {url to service homepage (as found in FOAF)}|{URI template}\n* - # example: http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName} +def load_accounts(config, section): accounts = {} if(config.has_option(section, 'online_accounts')): values = config.get(section, 'online_accounts') @@ -29,62 +11,166 @@ def foaf2config(rdf, config=None): except: pass - model = Model() + return accounts + +def load_model(rdf, base_uri): + + if hasattr(rdf, 'find_statements'): + return rdf + + if hasattr(rdf, 'read'): + rdf = rdf.read() + def handler(code, level, facility, message, line, column, byte, file, uri): pass - Parser().parse_string_into_model(model,rdf,section,handler) + + from RDF import Model, Parser + + model = Model() + + Parser().parse_string_into_model(model,rdf,base_uri,handler) + + return model + +# input = foaf, output = ConfigParser +def foaf2config(rdf, config, subject=None): + + if not config or not config.sections(): + return + + # there should be only be 1 section + section = config.sections().pop() + + try: + from RDF import Model, NS, Parser, Statement + except: + return + + # account mappings, none by default + # form: accounts = {url to service homepage (as found in FOAF)}|{URI template}\n* + # example: http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName} + accounts = load_accounts(config, section) + + depth = 0 + + if(config.has_option(section, 'depth')): + depth = config.getint(section, 'depth') + + model = load_model(rdf, section) dc = NS('http://purl.org/dc/elements/1.1/') foaf = NS('http://xmlns.com/foaf/0.1/') rdfs = NS('http://www.w3.org/2000/01/rdf-schema#') - for statement in model.find_statements(Statement(None,foaf.weblog,None)): + for statement in model.find_statements(Statement(subject,foaf.weblog,None)): # feed owner person = statement.subject - - feed = model.get_target(statement.object,rdfs.seeAlso) - if not feed: continue + # title is required (at the moment) title = model.get_target(person,foaf.name) if not title: title = model.get_target(statement.object,dc.title) - if not title: continue - - feed = str(feed.uri) - if not config.has_section(feed): - config.add_section(feed) - config.set(feed, 'name', str(title)) - - # if we don't have mappings, we're done - if not accounts.keys(): + if not title: continue - # now look for OnlineAccounts for the same person - for statement in model.find_statements(Statement(person,foaf.holdsAccount,None)): - rdfaccthome = model.get_target(statement.object,foaf.accountServiceHomepage) - rdfacctname = model.get_target(statement.object,foaf.accountName) - - if not rdfaccthome or not rdfacctname: continue - - if not rdfaccthome.is_resource() or not accounts.has_key(str(rdfaccthome.uri)): continue - - if not rdfacctname.is_literal(): continue - - rdfacctname = rdfacctname.literal_value['string'] - rdfaccthome = str(rdfaccthome.uri) - - # shorten feed title a bit - try: - servicetitle = rdfaccthome.replace('http://','').split('/')[0] - except: - servicetitle = rdfaccthome - - feed = accounts[rdfaccthome].replace("{foaf:accountName}", rdfacctname) + # blog is optional + # TODO: check for rdf:type rss:channel + feed = model.get_target(statement.object,rdfs.seeAlso) + if feed: + feed = str(feed.uri) if not config.has_section(feed): config.add_section(feed) - config.set(feed, 'name', "%s (%s)" % (title, servicetitle)) + config.set(feed, 'name', str(title)) - return config + # now look for OnlineAccounts for the same person + if accounts.keys(): + for statement in model.find_statements(Statement(person,foaf.holdsAccount,None)): + rdfaccthome = model.get_target(statement.object,foaf.accountServiceHomepage) + rdfacctname = model.get_target(statement.object,foaf.accountName) + + if not rdfaccthome or not rdfacctname: continue + + if not rdfaccthome.is_resource() or not accounts.has_key(str(rdfaccthome.uri)): continue + + if not rdfacctname.is_literal(): continue + + rdfacctname = rdfacctname.literal_value['string'] + rdfaccthome = str(rdfaccthome.uri) + + # shorten feed title a bit + try: + servicetitle = rdfaccthome.replace('http://','').split('/')[0] + except: + servicetitle = rdfaccthome + + feed = accounts[rdfaccthome].replace("{foaf:accountName}", rdfacctname) + if not config.has_section(feed): + config.add_section(feed) + config.set(feed, 'name', "%s (%s)" % (title, servicetitle)) + + if depth > 0: + + # now the fun part, let's go after more friends + for statement in model.find_statements(Statement(person,foaf.knows,None)): + friend = statement.object + + # let's be safe + if friend.is_literal(): continue + + seeAlso = model.get_target(friend,rdfs.seeAlso) + + # nothing to see + if not seeAlso or not seeAlso.is_resource(): continue + + seeAlso = str(seeAlso.uri) + + if not config.has_section(seeAlso): + config.add_section(seeAlso) + config.set(seeAlso, 'content_type', 'foaf') + config.set(seeAlso, 'depth', str(depth - 1)) + + try: + import planet + planet.downloadReadingList(seeAlso, config, + lambda data, subconfig : friend2config(model, friend, seeAlso, subconfig, data), + False) + except: + pass + + return + +def friend2config(friend_model, friend, seeAlso, subconfig, data): + + try: + from RDF import Model, NS, Parser, Statement + except: + return + + dc = NS('http://purl.org/dc/elements/1.1/') + foaf = NS('http://xmlns.com/foaf/0.1/') + rdf = NS('http://www.w3.org/1999/02/22-rdf-syntax-ns#') + rdfs = NS('http://www.w3.org/2000/01/rdf-schema#') + + # FOAF InverseFunctionalProperties + ifps = [foaf.mbox, foaf.mbox_sha1sum, foaf.jabberID, foaf.aimChatID, + foaf.icqChatID, foaf.yahooChatID, foaf.msnChatID, foaf.homepage, foaf.weblog] + + model = load_model(data, seeAlso) + + for statement in model.find_statements(Statement(None,rdf.type,foaf.Person)): + + samefriend = statement.subject + + # maybe they have the same uri + if friend.is_resource() and samefriend.is_resource(): + # TODO + pass + + for ifp in ifps: + object = model.get_target(samefriend,ifp) + if object and object == friend_model.get_target(friend, ifp): + foaf2config(model, subconfig, samefriend) + return if __name__ == "__main__": import sys, urllib diff --git a/tests/data/config/another.foaf b/tests/data/config/another.foaf new file mode 100644 index 0000000..2e5bbb5 --- /dev/null +++ b/tests/data/config/another.foaf @@ -0,0 +1,38 @@ + + + + + + + Another Elias Torres + + + + Elias Torres + + + + + + + + + + + SOMEID + + + + + + SOMEID + + + + + diff --git a/tests/data/config/eliast.foaf b/tests/data/config/eliast.foaf index a9e2f7a..1c56f08 100644 --- a/tests/data/config/eliast.foaf +++ b/tests/data/config/eliast.foaf @@ -12,167 +12,44 @@ Elias Torres - Elias - Torres - EliasT - 171e69034f0c1563ed13d66d4abaa8b5d70e4e57 - - - - - rico811 - elias_torres@hotmail.com - - - Dan Smith - - bd4506f3e280442e626b7123d65dc6d7af348906 - - Lee Feigenbaum 15b51eca0082d66bee850ce4774ff2d9921c1f08 - - - - - - Dan Connolly - - - - - - - - - - James Snell - - + Sam Ruby 703471c6f39094d88665d24ce72c42fdc5f20585 + - - - Mark Pilgrim - 85d089d9dc87139d5542aa4ee2822bf65e56b55e - - - - - Eric Miller - fd1c7fa497930b8b24e3998927fcebe63509ef20 - - - - - - - Libby Miller - 80f78952ba7c71bc9a0a38de415692922a3f6024 - 289d4d44325d0b0218edc856c8c3904fa3fd2875 - - - - - Dave Beckett - 970987f991961f2553a1bf2574166fa29befbccb - - - - - - Kendall Grant Clark - - - - - - - - Bijan Parsia - f49a6854842c5fa76dc0edb8e82f8fe04fd56bc9 - - - - - - - - - - - - - - - - - - - - - + + + Elias Torres + + + + + - - - Elias Torres - - - - - - - - - + + + + eliast + + + + + + 77366516@N00 + + - - - - eliast - - - - - - 77366516@N00 - - - - - - - - - diff --git a/tests/data/config/foaf-deep.ini b/tests/data/config/foaf-deep.ini new file mode 100644 index 0000000..a38e409 --- /dev/null +++ b/tests/data/config/foaf-deep.ini @@ -0,0 +1,7 @@ +[Planet] +name = FOAF Test Configuration +cache_directory = tests/work/config/cache + +[tests/data/config/eliast.foaf] +content_type = foaf +depth=1 diff --git a/tests/data/config/foaf-multiple.ini b/tests/data/config/foaf-multiple.ini new file mode 100644 index 0000000..3272baf --- /dev/null +++ b/tests/data/config/foaf-multiple.ini @@ -0,0 +1,18 @@ +[Planet] +name = FOAF Test Configuration +cache_directory = tests/work/config/cache + +[tests/data/config/eliast.foaf] +content_type = foaf +random_setting = eliast +online_accounts = + http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName} + http://flickr.com/|http://api.flickr.com/services/feeds/photos_public.gne?id={foaf:accountName} + +[tests/data/config/another.foaf] +content_type = foaf +random_setting = another +online_accounts = + http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName} + http://flickr.com/|http://api.flickr.com/services/feeds/photos_public.gne?id={foaf:accountName} + diff --git a/tests/data/config/foaf.ini b/tests/data/config/foaf.ini index db7a7fb..281a995 100644 --- a/tests/data/config/foaf.ini +++ b/tests/data/config/foaf.ini @@ -7,4 +7,3 @@ content_type = foaf online_accounts = http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName} http://flickr.com/|http://api.flickr.com/services/feeds/photos_public.gne?id={foaf:accountName} - diff --git a/tests/data/config/ldf-card.foaf b/tests/data/config/ldf-card.foaf new file mode 100644 index 0000000..50a0855 --- /dev/null +++ b/tests/data/config/ldf-card.foaf @@ -0,0 +1,33 @@ + + + + + + Lee Feigenbaum + 15b51eca0082d66bee850ce4774ff2d9921c1f08 + + + + Lee Feigenbaum + + + + + + + + + + leef + + + + + diff --git a/tests/data/config/rubys-card.foaf b/tests/data/config/rubys-card.foaf new file mode 100644 index 0000000..7bab6ac --- /dev/null +++ b/tests/data/config/rubys-card.foaf @@ -0,0 +1,33 @@ + + + + + + Sam Ruby + 703471c6f39094d88665d24ce72c42fdc5f20585 + + + + Its just data. + + + + + + + + + + rubys + + + + + diff --git a/tests/test_foaf.py b/tests/test_foaf.py index adc2022..4d9b355 100644 --- a/tests/test_foaf.py +++ b/tests/test_foaf.py @@ -91,6 +91,28 @@ class FoafTest(unittest.TestCase): 'http://del.icio.us/rss/eliast', 'http://torrez.us/feed/rdf'], feeds) + def test_multiple_subscriptions(self): + config.load('tests/data/config/foaf-multiple.ini') + self.assertEqual(2,len(config.reading_lists())) + feeds = config.subscriptions() + feeds.sort() + self.assertEqual(5,len(feeds)) + self.assertEqual(['http://api.flickr.com/services/feeds/' + + 'photos_public.gne?id=77366516@N00', + 'http://api.flickr.com/services/feeds/' + + 'photos_public.gne?id=SOMEID', + 'http://del.icio.us/rss/SOMEID', + 'http://del.icio.us/rss/eliast', + 'http://torrez.us/feed/rdf'], feeds) + + def test_recursive(self): + config.load('tests/data/config/foaf-deep.ini') + feeds = config.subscriptions() + feeds.sort() + self.assertEqual(['http://intertwingly.net/blog/atom.xml', + 'http://thefigtrees.net/lee/life/atom.xml', + 'http://torrez.us/feed/rdf'], feeds) + # these tests only make sense if libRDF is installed try: import RDF