add PubSubHubbub-Publisher 1.0 to planet/vendor/, use it to publish at the end of each run

2010-06-15 11:21:41 -05:00 · 2010-06-15 11:21:41 -05:00 · fdb91411dc
commit fdb91411dc
parent 1f5c9ce848
6 changed files with 114 additions and 0 deletions
--- a/planet.py
+++ b/planet.py
@ -23,6 +23,7 @@ if __name__ == "__main__":
    only_if_new = 0
    expunge = 0
    debug_splice = 0
+    no_publish = 0

    for arg in sys.argv[1:]:
        if arg == "-h" or arg == "--help":
@ -34,6 +35,7 @@ if __name__ == "__main__":
            print " -h, --help          Display this help message and exit"
            print " -n, --only-if-new   Only spider new feeds"
            print " -x, --expunge       Expunge old entries from cache"
+            print " --no-publish        Do not publish feeds using PubSubHubbub"
            print
            sys.exit(0)
        elif arg == "-v" or arg == "--verbose":
@ -46,6 +48,8 @@ if __name__ == "__main__":
            expunge = 1
        elif arg == "-d" or arg == "--debug-splice":
            debug_splice = 1
+        elif arg == "--no-publish":
+            no_publish = 1
        elif arg.startswith("-"):
            print >>sys.stderr, "Unknown option:", arg
            sys.exit(1)
@ -84,6 +88,10 @@ if __name__ == "__main__":

    splice.apply(doc.toxml('utf-8'))

+    if not no_publish:
+        from planet import publish
+        publish.publish(config)
+
    if expunge:
        from planet import expunge
        expunge.expungeCache
--- a/planet/init.py
+++ b/planet/init.py
@ -38,3 +38,5 @@ sys.path.insert(1, os.path.join(os.path.dirname(__file__),'vendor'))
 import feedparser
 feedparser.SANITIZE_HTML=1
 feedparser.RESOLVE_RELATIVE_URIS=0
+
+import publish
--- a/planet/publish.py
+++ b/planet/publish.py
@ -0,0 +1,15 @@
+import os, sys
+import urlparse
+import pubsubhubbub_publisher as PuSH
+
+def publish(config):
+    hub = config.pubsubhubbub_hub()
+    link = config.link()
+    if hub and link:
+        for root, dirs, files in os.walk(config.output_dir()):
+            xmlfiles = [urlparse.urljoin(link, f) for f in files if f.endswith('.xml')]
+            try:
+                PuSH.publish(hub, xmlfiles)
+            except PuSH.PublishError, e:
+                sys.stderr.write("PubSubHubbub publishing error: %s\n" % e)
+            break
--- a/planet/vendor/pubsubhubbub_publisher/PKG-INFO
+++ b/planet/vendor/pubsubhubbub_publisher/PKG-INFO
@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: PubSubHubbub_Publisher
+Version: 1.0
+Summary: Publisher client for PubSubHubbub
+Home-page: http://code.google.com/p/pubsubhubbub/
+Author: Brett Slatkin
+Author-email: bslatkin@gmail.com
+License: Apache 2.0
+Description: A simple, open, server-to-server web-hook-based pubsub (publish/subscribe) protocol as a simple extension to Atom. Parties (servers) speaking the PubSubHubbub protocol can get near-instant notifications (via webhook callbacks) when a topic (Atom URL) they're interested in is updated.
+Platform: UNKNOWN
--- a/planet/vendor/pubsubhubbub_publisher/init.py
+++ b/planet/vendor/pubsubhubbub_publisher/init.py
@ -0,0 +1,2 @@
+from pubsubhubbub_publish import *
+
--- a/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py
+++ b/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py
@ -0,0 +1,77 @@
+#!/usr/bin/env python
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Simple Publisher client for PubSubHubbub.
+
+Example usage:
+
+  from pubsubhubbub_publish import *
+  try:
+    publish('http://pubsubhubbub.appspot.com',
+            'http://example.com/feed1/atom.xml',
+            'http://example.com/feed2/atom.xml',
+            'http://example.com/feed3/atom.xml')
+  except PublishError, e:
+    # handle exception...
+
+Set the 'http_proxy' environment variable on *nix or Windows to use an
+HTTP proxy.
+"""
+
+__author__ = 'bslatkin@gmail.com (Brett Slatkin)'
+
+import urllib
+import urllib2
+
+
+class PublishError(Exception):
+  """An error occurred while trying to publish to the hub."""
+
+
+URL_BATCH_SIZE = 100
+
+
+def publish(hub, *urls):
+  """Publishes an event to a hub.
+
+  Args:
+    hub: The hub to publish the event to.
+    **urls: One or more URLs to publish to. If only a single URL argument is
+      passed and that item is an iterable that is not a string, the contents of
+      that iterable will be used to produce the list of published URLs. If
+      more than URL_BATCH_SIZE URLs are supplied, this function will batch them
+      into chunks across multiple requests.
+
+  Raises:
+    PublishError if anything went wrong during publishing.
+  """
+  if len(urls) == 1 and not isinstance(urls[0], basestring):
+    urls = list(urls[0])
+
+  for i in xrange(0, len(urls), URL_BATCH_SIZE):
+    chunk = urls[i:i+URL_BATCH_SIZE]
+    data = urllib.urlencode(
+        {'hub.url': chunk, 'hub.mode': 'publish'}, doseq=True)
+    try:
+      response = urllib2.urlopen(hub, data)
+    except (IOError, urllib2.HTTPError), e:
+      if hasattr(e, 'code') and e.code == 204:
+        continue
+      error = ''
+      if hasattr(e, 'read'):
+        error = e.read()
+      raise PublishError('%s, Response: "%s"' % (e, error))