add PubSubHubbub-Publisher 1.0 to planet/vendor/, use it to publish at the end of each run

This commit is contained in:
Matt Domsch 2010-06-15 11:21:41 -05:00 committed by Sam Ruby
parent 1f5c9ce848
commit fdb91411dc
6 changed files with 114 additions and 0 deletions

View File

@ -23,6 +23,7 @@ if __name__ == "__main__":
only_if_new = 0
expunge = 0
debug_splice = 0
no_publish = 0
for arg in sys.argv[1:]:
if arg == "-h" or arg == "--help":
@ -34,6 +35,7 @@ if __name__ == "__main__":
print " -h, --help Display this help message and exit"
print " -n, --only-if-new Only spider new feeds"
print " -x, --expunge Expunge old entries from cache"
print " --no-publish Do not publish feeds using PubSubHubbub"
print
sys.exit(0)
elif arg == "-v" or arg == "--verbose":
@ -46,6 +48,8 @@ if __name__ == "__main__":
expunge = 1
elif arg == "-d" or arg == "--debug-splice":
debug_splice = 1
elif arg == "--no-publish":
no_publish = 1
elif arg.startswith("-"):
print >>sys.stderr, "Unknown option:", arg
sys.exit(1)
@ -84,6 +88,10 @@ if __name__ == "__main__":
splice.apply(doc.toxml('utf-8'))
if not no_publish:
from planet import publish
publish.publish(config)
if expunge:
from planet import expunge
expunge.expungeCache

View File

@ -38,3 +38,5 @@ sys.path.insert(1, os.path.join(os.path.dirname(__file__),'vendor'))
import feedparser
feedparser.SANITIZE_HTML=1
feedparser.RESOLVE_RELATIVE_URIS=0
import publish

15
planet/publish.py Normal file
View File

@ -0,0 +1,15 @@
import os, sys
import urlparse
import pubsubhubbub_publisher as PuSH
def publish(config):
hub = config.pubsubhubbub_hub()
link = config.link()
if hub and link:
for root, dirs, files in os.walk(config.output_dir()):
xmlfiles = [urlparse.urljoin(link, f) for f in files if f.endswith('.xml')]
try:
PuSH.publish(hub, xmlfiles)
except PuSH.PublishError, e:
sys.stderr.write("PubSubHubbub publishing error: %s\n" % e)
break

View File

@ -0,0 +1,10 @@
Metadata-Version: 1.0
Name: PubSubHubbub_Publisher
Version: 1.0
Summary: Publisher client for PubSubHubbub
Home-page: http://code.google.com/p/pubsubhubbub/
Author: Brett Slatkin
Author-email: bslatkin@gmail.com
License: Apache 2.0
Description: A simple, open, server-to-server web-hook-based pubsub (publish/subscribe) protocol as a simple extension to Atom. Parties (servers) speaking the PubSubHubbub protocol can get near-instant notifications (via webhook callbacks) when a topic (Atom URL) they're interested in is updated.
Platform: UNKNOWN

View File

@ -0,0 +1,2 @@
from pubsubhubbub_publish import *

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python
#
# Copyright 2009 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Simple Publisher client for PubSubHubbub.
Example usage:
from pubsubhubbub_publish import *
try:
publish('http://pubsubhubbub.appspot.com',
'http://example.com/feed1/atom.xml',
'http://example.com/feed2/atom.xml',
'http://example.com/feed3/atom.xml')
except PublishError, e:
# handle exception...
Set the 'http_proxy' environment variable on *nix or Windows to use an
HTTP proxy.
"""
__author__ = 'bslatkin@gmail.com (Brett Slatkin)'
import urllib
import urllib2
class PublishError(Exception):
"""An error occurred while trying to publish to the hub."""
URL_BATCH_SIZE = 100
def publish(hub, *urls):
"""Publishes an event to a hub.
Args:
hub: The hub to publish the event to.
**urls: One or more URLs to publish to. If only a single URL argument is
passed and that item is an iterable that is not a string, the contents of
that iterable will be used to produce the list of published URLs. If
more than URL_BATCH_SIZE URLs are supplied, this function will batch them
into chunks across multiple requests.
Raises:
PublishError if anything went wrong during publishing.
"""
if len(urls) == 1 and not isinstance(urls[0], basestring):
urls = list(urls[0])
for i in xrange(0, len(urls), URL_BATCH_SIZE):
chunk = urls[i:i+URL_BATCH_SIZE]
data = urllib.urlencode(
{'hub.url': chunk, 'hub.mode': 'publish'}, doseq=True)
try:
response = urllib2.urlopen(hub, data)
except (IOError, urllib2.HTTPError), e:
if hasattr(e, 'code') and e.code == 204:
continue
error = ''
if hasattr(e, 'read'):
error = e.read()
raise PublishError('%s, Response: "%s"' % (e, error))