add PubSubHubbub-Publisher 1.0 to planet/vendor/, use it to publish at the end of each run
This commit is contained in:
parent
1f5c9ce848
commit
fdb91411dc
@ -23,6 +23,7 @@ if __name__ == "__main__":
|
|||||||
only_if_new = 0
|
only_if_new = 0
|
||||||
expunge = 0
|
expunge = 0
|
||||||
debug_splice = 0
|
debug_splice = 0
|
||||||
|
no_publish = 0
|
||||||
|
|
||||||
for arg in sys.argv[1:]:
|
for arg in sys.argv[1:]:
|
||||||
if arg == "-h" or arg == "--help":
|
if arg == "-h" or arg == "--help":
|
||||||
@ -34,6 +35,7 @@ if __name__ == "__main__":
|
|||||||
print " -h, --help Display this help message and exit"
|
print " -h, --help Display this help message and exit"
|
||||||
print " -n, --only-if-new Only spider new feeds"
|
print " -n, --only-if-new Only spider new feeds"
|
||||||
print " -x, --expunge Expunge old entries from cache"
|
print " -x, --expunge Expunge old entries from cache"
|
||||||
|
print " --no-publish Do not publish feeds using PubSubHubbub"
|
||||||
print
|
print
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
elif arg == "-v" or arg == "--verbose":
|
elif arg == "-v" or arg == "--verbose":
|
||||||
@ -46,6 +48,8 @@ if __name__ == "__main__":
|
|||||||
expunge = 1
|
expunge = 1
|
||||||
elif arg == "-d" or arg == "--debug-splice":
|
elif arg == "-d" or arg == "--debug-splice":
|
||||||
debug_splice = 1
|
debug_splice = 1
|
||||||
|
elif arg == "--no-publish":
|
||||||
|
no_publish = 1
|
||||||
elif arg.startswith("-"):
|
elif arg.startswith("-"):
|
||||||
print >>sys.stderr, "Unknown option:", arg
|
print >>sys.stderr, "Unknown option:", arg
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@ -84,6 +88,10 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
splice.apply(doc.toxml('utf-8'))
|
splice.apply(doc.toxml('utf-8'))
|
||||||
|
|
||||||
|
if not no_publish:
|
||||||
|
from planet import publish
|
||||||
|
publish.publish(config)
|
||||||
|
|
||||||
if expunge:
|
if expunge:
|
||||||
from planet import expunge
|
from planet import expunge
|
||||||
expunge.expungeCache
|
expunge.expungeCache
|
||||||
|
@ -38,3 +38,5 @@ sys.path.insert(1, os.path.join(os.path.dirname(__file__),'vendor'))
|
|||||||
import feedparser
|
import feedparser
|
||||||
feedparser.SANITIZE_HTML=1
|
feedparser.SANITIZE_HTML=1
|
||||||
feedparser.RESOLVE_RELATIVE_URIS=0
|
feedparser.RESOLVE_RELATIVE_URIS=0
|
||||||
|
|
||||||
|
import publish
|
||||||
|
15
planet/publish.py
Normal file
15
planet/publish.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import os, sys
|
||||||
|
import urlparse
|
||||||
|
import pubsubhubbub_publisher as PuSH
|
||||||
|
|
||||||
|
def publish(config):
|
||||||
|
hub = config.pubsubhubbub_hub()
|
||||||
|
link = config.link()
|
||||||
|
if hub and link:
|
||||||
|
for root, dirs, files in os.walk(config.output_dir()):
|
||||||
|
xmlfiles = [urlparse.urljoin(link, f) for f in files if f.endswith('.xml')]
|
||||||
|
try:
|
||||||
|
PuSH.publish(hub, xmlfiles)
|
||||||
|
except PuSH.PublishError, e:
|
||||||
|
sys.stderr.write("PubSubHubbub publishing error: %s\n" % e)
|
||||||
|
break
|
10
planet/vendor/pubsubhubbub_publisher/PKG-INFO
vendored
Normal file
10
planet/vendor/pubsubhubbub_publisher/PKG-INFO
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
Metadata-Version: 1.0
|
||||||
|
Name: PubSubHubbub_Publisher
|
||||||
|
Version: 1.0
|
||||||
|
Summary: Publisher client for PubSubHubbub
|
||||||
|
Home-page: http://code.google.com/p/pubsubhubbub/
|
||||||
|
Author: Brett Slatkin
|
||||||
|
Author-email: bslatkin@gmail.com
|
||||||
|
License: Apache 2.0
|
||||||
|
Description: A simple, open, server-to-server web-hook-based pubsub (publish/subscribe) protocol as a simple extension to Atom. Parties (servers) speaking the PubSubHubbub protocol can get near-instant notifications (via webhook callbacks) when a topic (Atom URL) they're interested in is updated.
|
||||||
|
Platform: UNKNOWN
|
2
planet/vendor/pubsubhubbub_publisher/__init__.py
vendored
Normal file
2
planet/vendor/pubsubhubbub_publisher/__init__.py
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
from pubsubhubbub_publish import *
|
||||||
|
|
77
planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py
vendored
Normal file
77
planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py
vendored
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Copyright 2009 Google Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
"""Simple Publisher client for PubSubHubbub.
|
||||||
|
|
||||||
|
Example usage:
|
||||||
|
|
||||||
|
from pubsubhubbub_publish import *
|
||||||
|
try:
|
||||||
|
publish('http://pubsubhubbub.appspot.com',
|
||||||
|
'http://example.com/feed1/atom.xml',
|
||||||
|
'http://example.com/feed2/atom.xml',
|
||||||
|
'http://example.com/feed3/atom.xml')
|
||||||
|
except PublishError, e:
|
||||||
|
# handle exception...
|
||||||
|
|
||||||
|
Set the 'http_proxy' environment variable on *nix or Windows to use an
|
||||||
|
HTTP proxy.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__author__ = 'bslatkin@gmail.com (Brett Slatkin)'
|
||||||
|
|
||||||
|
import urllib
|
||||||
|
import urllib2
|
||||||
|
|
||||||
|
|
||||||
|
class PublishError(Exception):
|
||||||
|
"""An error occurred while trying to publish to the hub."""
|
||||||
|
|
||||||
|
|
||||||
|
URL_BATCH_SIZE = 100
|
||||||
|
|
||||||
|
|
||||||
|
def publish(hub, *urls):
|
||||||
|
"""Publishes an event to a hub.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hub: The hub to publish the event to.
|
||||||
|
**urls: One or more URLs to publish to. If only a single URL argument is
|
||||||
|
passed and that item is an iterable that is not a string, the contents of
|
||||||
|
that iterable will be used to produce the list of published URLs. If
|
||||||
|
more than URL_BATCH_SIZE URLs are supplied, this function will batch them
|
||||||
|
into chunks across multiple requests.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
PublishError if anything went wrong during publishing.
|
||||||
|
"""
|
||||||
|
if len(urls) == 1 and not isinstance(urls[0], basestring):
|
||||||
|
urls = list(urls[0])
|
||||||
|
|
||||||
|
for i in xrange(0, len(urls), URL_BATCH_SIZE):
|
||||||
|
chunk = urls[i:i+URL_BATCH_SIZE]
|
||||||
|
data = urllib.urlencode(
|
||||||
|
{'hub.url': chunk, 'hub.mode': 'publish'}, doseq=True)
|
||||||
|
try:
|
||||||
|
response = urllib2.urlopen(hub, data)
|
||||||
|
except (IOError, urllib2.HTTPError), e:
|
||||||
|
if hasattr(e, 'code') and e.code == 204:
|
||||||
|
continue
|
||||||
|
error = ''
|
||||||
|
if hasattr(e, 'read'):
|
||||||
|
error = e.read()
|
||||||
|
raise PublishError('%s, Response: "%s"' % (e, error))
|
Loading…
x
Reference in New Issue
Block a user