From b75ba9684e54a086cdb0cc5ea1e89c590b190a72 Mon Sep 17 00:00:00 2001
From: Sam Ruby <rubys@intertwingly.net>
Date: Tue, 19 Sep 2006 20:52:08 -0400
Subject: [PATCH] Support python 2.2 through python 2.5

---
 planet/BeautifulSoup.py                   |  3 +-
 planet/__init__.py                        |  7 ++--
 planet/feedparser.py                      | 44 ++++++++++++++---------
 planet/reconstitute.py                    |  4 +--
 tests/data/reconstitute/source_bozo.xml   |  8 +++++
 tests/data/reconstitute/source_format.xml |  8 +++++
 tests/test_filters.py                     |  7 +++-
 7 files changed, 57 insertions(+), 24 deletions(-)
 create mode 100644 tests/data/reconstitute/source_bozo.xml
 create mode 100644 tests/data/reconstitute/source_format.xml

diff --git a/planet/BeautifulSoup.py b/planet/BeautifulSoup.py
index 1aec4cd..9236930 100644
--- a/planet/BeautifulSoup.py
+++ b/planet/BeautifulSoup.py
@@ -821,7 +821,8 @@ class SoupStrainer:
     def _matches(self, markup, matchAgainst):    
         #print "Matching %s against %s" % (markup, matchAgainst)
         result = False
-        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
+        if matchAgainst == True and (not hasattr(types, 'BooleanType') or
+            type(matchAgainst) == types.BooleanType):
             result = markup != None
         elif callable(matchAgainst):
             result = matchAgainst(markup)
diff --git a/planet/__init__.py b/planet/__init__.py
index ceb4b61..baeb991 100644
--- a/planet/__init__.py
+++ b/planet/__init__.py
@@ -67,8 +67,8 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
         options = {}
 
         # add original options
-        for key, value in orig_config.items(list):
-            options[key] = value
+        for key in orig_config.options(list):
+            options[key] = orig_config.get(list, key)
             
         try:
             if use_cache:
@@ -85,8 +85,9 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
             cached_config.set(list, key, value)
 
         # read list
+        curdir=getattr(os.path, 'curdir', '.')
         if sys.platform.find('win') < 0:
-            base = urljoin('file:', os.path.abspath(os.path.curdir))
+            base = urljoin('file:', os.path.abspath(curdir))
         else:
             path = os.path.abspath(os.path.curdir)
             base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
diff --git a/planet/feedparser.py b/planet/feedparser.py
index 191e374..b261759 100755
--- a/planet/feedparser.py
+++ b/planet/feedparser.py
@@ -130,6 +130,18 @@ try:
 except:
     chardet = None
 
+# reversable htmlentitydefs mappings for Python 2.2
+try:
+  from htmlentitydefs import name2codepoint, codepoint2name
+except:
+  import htmlentitydefs
+  name2codepoint={}
+  codepoint2name={}
+  for (name,codepoint) in htmlentitydefs.entitydefs.iteritems():
+    if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
+    name2codepoint[name]=ord(codepoint)
+    codepoint2name[ord(codepoint)]=name
+
 # BeautifulSoup parser used for parsing microformats from embedded HTML content
 # http://www.crummy.com/software/BeautifulSoup/.  At the moment, it appears
 # that there is a version incompatibility, so the import is replaced with
@@ -574,20 +586,9 @@ class _FeedParserMixin:
             if text.startswith('&#') and text.endswith(';'):
                 return self.handle_entityref(text)
         else:
-            # entity resolution graciously donated by Aaron Swartz
-            def name2cp(k):
-                import htmlentitydefs
-                if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
-                    return htmlentitydefs.name2codepoint[k]
-                k = htmlentitydefs.entitydefs[k]
-                if k.startswith('&#x') and k.endswith(';'):
-                    return int(k[3:-1],16) # not in latin-1
-                if k.startswith('&#') and k.endswith(';'):
-                    return int(k[2:-1]) # not in latin-1
-                return ord(k)
-            try: name2cp(ref)
+            try: name2codepoint[ref]
             except KeyError: text = '&%s;' % ref
-            else: text = unichr(name2cp(ref)).encode('utf-8')
+            else: text = unichr(name2codepoint[ref]).encode('utf-8')
         self.elementstack[-1][2].append(text)
 
     def handle_data(self, text, escape=1):
@@ -672,9 +673,9 @@ class _FeedParserMixin:
             # only if all the remaining content is nested underneath it.
             # This means that the divs would be retained in the following:
             #    <div>foo</div><div>bar</div>
-            if pieces and len(pieces)>1 and not pieces[-1].strip():
+            while pieces and len(pieces)>1 and not pieces[-1].strip():
                 del pieces[-1]
-            if pieces and len(pieces)>1 and not pieces[0].strip():
+            while pieces and len(pieces)>1 and not pieces[0].strip():
                 del pieces[0]
             if pieces and (pieces[0] == '<div>' or pieces[0].startswith('<div ')) and pieces[-1]=='</div>':
                 depth = 0
@@ -1521,6 +1522,11 @@ if _XML_AVAILABLE:
 
             if prefix:
                 localname = prefix.lower() + ':' + localname
+            elif namespace and not qname: #Expat
+                for name,value in self.namespacesInUse.items():
+                     if name and value == namespace:
+                         localname = name + ':' + localname
+                         break
             if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
 
             for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
@@ -1546,6 +1552,11 @@ if _XML_AVAILABLE:
             prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
             if prefix:
                 localname = prefix + ':' + localname
+            elif namespace and not qname: #Expat
+                for name,value in self.namespacesInUse.items():
+                     if name and value == namespace:
+                         localname = name + ':' + localname
+                         break
             localname = str(localname).lower()
             self.unknown_endtag(localname)
 
@@ -1657,8 +1668,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
     def handle_entityref(self, ref):
         # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
         # Reconstruct the original entity reference.
-        import htmlentitydefs
-        if not hasattr(htmlentitydefs, 'name2codepoint') or htmlentitydefs.name2codepoint.has_key(ref):
+        if name2codepoint.has_key(ref):
             self.pieces.append('&%(ref)s;' % locals())
         else:
             self.pieces.append('&amp;%(ref)s' % locals())
diff --git a/planet/reconstitute.py b/planet/reconstitute.py
index 28f13c1..2badc50 100644
--- a/planet/reconstitute.py
+++ b/planet/reconstitute.py
@@ -193,8 +193,8 @@ def source(xsource, source, bozo, format):
         if key.startswith('planet_'):
             createTextElement(xsource, key.replace('_',':',1), value)
 
-    createTextElement(xsource, 'planet_bozo', bozo and 'true' or 'false')
-    createTextElement(xsource, 'planet_format', format)
+    createTextElement(xsource, 'planet:bozo', bozo and 'true' or 'false')
+    createTextElement(xsource, 'planet:format', format)
 
 def reconstitute(feed, entry):
     """ create an entry document from a parsed feed """
diff --git a/tests/data/reconstitute/source_bozo.xml b/tests/data/reconstitute/source_bozo.xml
new file mode 100644
index 0000000..38a6317
--- /dev/null
+++ b/tests/data/reconstitute/source_bozo.xml
@@ -0,0 +1,8 @@
+<!--
+Description:  id
+Expect:       source.planet_bozo == 'false'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry/>
+</feed>
diff --git a/tests/data/reconstitute/source_format.xml b/tests/data/reconstitute/source_format.xml
new file mode 100644
index 0000000..0e41171
--- /dev/null
+++ b/tests/data/reconstitute/source_format.xml
@@ -0,0 +1,8 @@
+<!--
+Description:  id
+Expect:       source.planet_format == 'atom10'
+-->
+
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <entry/>
+</feed>
diff --git a/tests/test_filters.py b/tests/test_filters.py
index aeee9a4..296e39f 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -81,6 +81,11 @@ try:
     sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE)
     sed.communicate()
     if sed.returncode != 0: raise Exception
-except:
+except Exception, expr:
     # sed is not available
     del FilterTests.test_stripAd_yahoo
+
+    if isinstance(expr, ImportError):
+        # Popen is not available
+        for method in dir(FilterTests):
+            if method.startswith('test_'):  delattr(FilterTests,method)