Add support for creativeCommons and cc namespaces.

This commit is contained in:
Sam Ruby 2007-03-26 17:42:41 -04:00
parent f46efd265e
commit 5977be5ea4
5 changed files with 97 additions and 8 deletions

48
docs/etiquette.html Normal file
View File

@ -0,0 +1,48 @@
<!DOCTYPE html PUBLIC
"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<script type="text/javascript" src="docs.js"></script>
<link rel="stylesheet" type="text/css" href="docs.css"/>
<title>Etiquette</title>
</head>
<body>
<h2>Etiquette</h2>
<p>You would think that people who publish syndication feeds do it with the
intent to be syndicated. But the truth is that we live in a world where
<a href="http://en.wikipedia.org/wiki/Deep_linking">deep linking</a> can
cause people to complain. Nothing is safe. But that doesn&#8217;t
stop us from doing links.</p>
<p>These concerns tend to increase when you profit, either directly via ads or
indirectly via search engine rankings, from the content of others.</p>
<p>While there are no hard and fast rules that apply here, here&#8217;s are a
few things you can do to mitigate the concern:</p>
<ul>
<li>Aggressively use robots.txt, meta tags, and the google/livejournal
atom namespace to mark your pages as not to be indexed by search
engines.</li>
<blockquote><p><dl>
<dt><a href="http://www.robotstxt.org/">robots.txt</a>:</dt>
<dd><p><code>User-agent: *
Disallow: /</code></p></dd>
<dt>index.html:</dt>
<dd><p><code>&lt;<a href="http://www.robotstxt.org/wc/meta-user.html">meta name="robots"</a> content="noindex,nofollow"/&gt;</code></p></dd>
<dt>atom.xml:</dt>
<dd><p><code>&lt;feed xmlns:indexing="<a href="http://community.livejournal.com/lj_dev/696793.html">urn:atom-extension:indexing</a>" indexing:index="no"&gt;</code></p>
<p><code>&lt;access:restriction xmlns:access="<a href="http://www.bloglines.com/about/specs/fac-1.0">http://www.bloglines.com/about/specs/fac-1.0</a>" relationship="deny"/&gt;</code></p></dd>
</dl></p></blockquote>
<li><p>Ensure that all <a href="http://nightly.feedparser.org/docs/reference-entry-source.html#reference.entry.source.rights">copyright</a> and <a href="http://nightly.feedparser.org/docs/reference-entry-license.html">licensing</a> information is propagated to the
combined feed(s) that you produce.</p></li>
<li><p>Add no advertising. Consider filtering out ads, lest you
be accused of using someone&#8217;s content to help your friends profit.</p></li>
<li><p>Most importantly, if anyone does object to their content being included,
quickly and without any complaint, remove them.</p></li>
</ul>
</body>
</html>

View File

@ -28,6 +28,7 @@
<ul>
<li><a href="migration.html">Migration from Planet 2.0</a></li>
<li><a href="contributing.html">Contributing</a></li>
<li><a href="etiquette.html">Etiquette</a></li>
</ul>
</li>
<li>Reference

View File

@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
"""
__version__ = "4.2-pre-" + "$Revision: 1.149 $"[11:16] + "-cvs"
__version__ = "4.2-pre-" + "$Revision: 1.150 $"[11:16] + "-cvs"
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@ -229,6 +229,10 @@ class FeedParserDict(UserDict):
if key == 'enclosures':
norel = lambda link: FeedParserDict([(name,value) for (name,value) in link.items() if name!='rel'])
return [norel(link) for link in UserDict.__getitem__(self, 'links') if link['rel']=='enclosure']
if key == 'license':
for link in UserDict.__getitem__(self, 'links'):
if link['rel']=='license' and link.has_key('href'):
return link['href']
if key == 'categories':
return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')]
realkey = self.keymap.get(key, key)
@ -424,7 +428,7 @@ class _FeedParserMixin:
}
_matchnamespaces = {}
can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'icon', 'logo']
can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
html_types = ['text/html', 'application/xhtml+xml']
@ -1247,17 +1251,26 @@ class _FeedParserMixin:
self._save('expired_parsed', _parse_date(self.pop('expired')))
def _start_cc_license(self, attrsD):
self.push('license', 1)
context = self._getContext()
value = self._getAttribute(attrsD, 'rdf:resource')
if value:
self.elementstack[-1][2].append(value)
self.pop('license')
attrsD = FeedParserDict()
attrsD['rel']='license'
if value: attrsD['href']=value
context.setdefault('links', []).append(attrsD)
def _start_creativecommons_license(self, attrsD):
self.push('license', 1)
_start_creativeCommons_license = _start_creativecommons_license
def _end_creativecommons_license(self):
self.pop('license')
value = self.pop('license')
context = self._getContext()
attrsD = FeedParserDict()
attrsD['rel']='license'
if value: attrsD['href']=value
context.setdefault('links', []).append(attrsD)
del context['license']
_end_creativeCommons_license = _end_creativecommons_license
def _addXFN(self, relationships, href, name):
context = self._getContext()
@ -3506,6 +3519,7 @@ class TextSerializer(Serializer):
class PprintSerializer(Serializer):
def write(self, stream=sys.stdout):
if self.results.has_key('href'):
stream.write(self.results['href'] + '\n\n')
from pprint import pprint
pprint(self.results, stream)

View File

@ -0,0 +1,13 @@
<!--
Description: creative commons license
Expect: links[0].rel == 'license' and links[0].href == 'http://www.creativecommons.org/licenses/by-nc/1.0'
-->
<rss version="2.0" xmlns:cc="http://web.resource.org/cc/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<channel>
<item>
<cc:license rdf:resource="http://www.creativecommons.org/licenses/by-nc/1.0"/>
</item>
</channel>
</rss>

View File

@ -0,0 +1,13 @@
<!--
Description: creative commons license
Expect: links[0].rel == 'license' and links[0].href == 'http://www.creativecommons.org/licenses/by-nc/1.0'
-->
<rss version="2.0" xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule">
<channel>
<item>
<creativeCommons:license>http://www.creativecommons.org/licenses/by-nc/1.0</creativeCommons:license>
</item>
</channel>
</rss>