Add support for creativeCommons and cc namespaces.

2007-03-26 17:42:41 -04:00 · 2007-03-26 17:42:41 -04:00 · 5977be5ea4
commit 5977be5ea4
parent f46efd265e
5 changed files with 97 additions and 8 deletions
--- a/docs/etiquette.html
+++ b/docs/etiquette.html
@ -0,0 +1,48 @@
+<!DOCTYPE html PUBLIC
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<script type="text/javascript" src="docs.js"></script>
+<link rel="stylesheet" type="text/css" href="docs.css"/>
+<title>Etiquette</title>
+</head>
+<body>
+<h2>Etiquette</h2>
+<p>You would think that people who publish syndication feeds do it with the
+intent to be syndicated.  But the truth is that we live in a world where
+<a href="http://en.wikipedia.org/wiki/Deep_linking">deep linking</a> can
+cause people to complain.  Nothing is safe.  But that doesn&#8217;t
+stop us from doing links.</p>
+
+<p>These concerns tend to increase when you profit, either directly via ads or
+indirectly via search engine rankings, from the content of others.</p>
+
+<p>While there are no hard and fast rules that apply here, here&#8217;s are a
+few things you can do to mitigate the concern:</p>
+
+<ul>
+<li>Aggressively use robots.txt, meta tags, and the google/livejournal
+atom namespace to mark your pages as not to be indexed by search
+engines.</li>
+<blockquote><p><dl>
+<dt><a href="http://www.robotstxt.org/">robots.txt</a>:</dt>
+<dd><p><code>User-agent: *
+Disallow: /</code></p></dd>
+<dt>index.html:</dt>
+<dd><p><code>&lt;<a href="http://www.robotstxt.org/wc/meta-user.html">meta name="robots"</a> content="noindex,nofollow"/&gt;</code></p></dd>
+<dt>atom.xml:</dt>
+<dd><p><code>&lt;feed xmlns:indexing="<a href="http://community.livejournal.com/lj_dev/696793.html">urn:atom-extension:indexing</a>" indexing:index="no"&gt;</code></p>
+<p><code>&lt;access:restriction xmlns:access="<a href="http://www.bloglines.com/about/specs/fac-1.0">http://www.bloglines.com/about/specs/fac-1.0</a>" relationship="deny"/&gt;</code></p></dd>
+</dl></p></blockquote>
+<li><p>Ensure that all <a href="http://nightly.feedparser.org/docs/reference-entry-source.html#reference.entry.source.rights">copyright</a> and <a href="http://nightly.feedparser.org/docs/reference-entry-license.html">licensing</a> information is propagated to the
+combined feed(s) that you produce.</p></li>
+
+<li><p>Add no advertising.  Consider filtering out ads, lest you
+be accused of using someone&#8217;s content to help your friends profit.</p></li>
+
+<li><p>Most importantly, if anyone does object to their content being included,
+quickly and without any complaint, remove them.</p></li>
+</ul>
+</body>
+</html>
--- a/docs/index.html
+++ b/docs/index.html
@ -28,6 +28,7 @@
 <ul>
 <li><a href="migration.html">Migration from Planet 2.0</a></li>
 <li><a href="contributing.html">Contributing</a></li>
+<li><a href="etiquette.html">Etiquette</a></li>
 </ul>
 </li>
 <li>Reference
--- a/planet/feedparser.py
+++ b/planet/feedparser.py
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
 Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
 """

-__version__ = "4.2-pre-" + "$Revision: 1.149 $"[11:16] + "-cvs"
+__version__ = "4.2-pre-" + "$Revision: 1.150 $"[11:16] + "-cvs"
 __license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
@ -229,6 +229,10 @@ class FeedParserDict(UserDict):
        if key == 'enclosures':
            norel = lambda link: FeedParserDict([(name,value) for (name,value) in link.items() if name!='rel'])
            return [norel(link) for link in UserDict.__getitem__(self, 'links') if link['rel']=='enclosure']
+        if key == 'license':
+            for link in UserDict.__getitem__(self, 'links'):
+                if link['rel']=='license' and link.has_key('href'):
+                    return link['href']
        if key == 'categories':
            return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')]
        realkey = self.keymap.get(key, key)
@ -424,7 +428,7 @@ class _FeedParserMixin:
 }
    _matchnamespaces = {}

-    can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
+    can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'icon', 'logo']
    can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
    can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
    html_types = ['text/html', 'application/xhtml+xml']
@ -1247,17 +1251,26 @@ class _FeedParserMixin:
        self._save('expired_parsed', _parse_date(self.pop('expired')))

    def _start_cc_license(self, attrsD):
-        self.push('license', 1)
+        context = self._getContext()
        value = self._getAttribute(attrsD, 'rdf:resource')
-        if value:
-            self.elementstack[-1][2].append(value)
-        self.pop('license')
+        attrsD = FeedParserDict()
+        attrsD['rel']='license'
+        if value: attrsD['href']=value
+        context.setdefault('links', []).append(attrsD)
        
    def _start_creativecommons_license(self, attrsD):
        self.push('license', 1)
+    _start_creativeCommons_license = _start_creativecommons_license

    def _end_creativecommons_license(self):
-        self.pop('license')
+        value = self.pop('license')
+        context = self._getContext()
+        attrsD = FeedParserDict()
+        attrsD['rel']='license'
+        if value: attrsD['href']=value
+        context.setdefault('links', []).append(attrsD)
+        del context['license']
+    _end_creativeCommons_license = _end_creativecommons_license

    def _addXFN(self, relationships, href, name):
        context = self._getContext()
@ -3506,6 +3519,7 @@ class TextSerializer(Serializer):
        
 class PprintSerializer(Serializer):
    def write(self, stream=sys.stdout):
+        if self.results.has_key('href'):
            stream.write(self.results['href'] + '\n\n')
        from pprint import pprint
        pprint(self.results, stream)
--- a/tests/data/reconstitute/cc_license.xml
+++ b/tests/data/reconstitute/cc_license.xml
@ -0,0 +1,13 @@
+<!--
+Description:  creative commons license
+Expect:       links[0].rel == 'license' and links[0].href == 'http://www.creativecommons.org/licenses/by-nc/1.0'
+-->
+
+<rss version="2.0" xmlns:cc="http://web.resource.org/cc/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+  <channel>
+    <item>
+      <cc:license rdf:resource="http://www.creativecommons.org/licenses/by-nc/1.0"/>
+    </item>
+  </channel>
+</rss>
+
--- a/tests/data/reconstitute/creativeCommons_license.xml
+++ b/tests/data/reconstitute/creativeCommons_license.xml
@ -0,0 +1,13 @@
+<!--
+Description:  creative commons license
+Expect:       links[0].rel == 'license' and links[0].href == 'http://www.creativecommons.org/licenses/by-nc/1.0'
+-->
+
+<rss version="2.0" xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule">
+  <channel>
+    <item>
+      <creativeCommons:license>http://www.creativecommons.org/licenses/by-nc/1.0</creativeCommons:license>
+    </item>
+  </channel>
+</rss>
+