From b75ba9684e54a086cdb0cc5ea1e89c590b190a72 Mon Sep 17 00:00:00 2001 From: Sam Ruby Date: Tue, 19 Sep 2006 20:52:08 -0400 Subject: [PATCH] Support python 2.2 through python 2.5 --- planet/BeautifulSoup.py | 3 +- planet/__init__.py | 7 ++-- planet/feedparser.py | 44 ++++++++++++++--------- planet/reconstitute.py | 4 +-- tests/data/reconstitute/source_bozo.xml | 8 +++++ tests/data/reconstitute/source_format.xml | 8 +++++ tests/test_filters.py | 7 +++- 7 files changed, 57 insertions(+), 24 deletions(-) create mode 100644 tests/data/reconstitute/source_bozo.xml create mode 100644 tests/data/reconstitute/source_format.xml diff --git a/planet/BeautifulSoup.py b/planet/BeautifulSoup.py index 1aec4cd..9236930 100644 --- a/planet/BeautifulSoup.py +++ b/planet/BeautifulSoup.py @@ -821,7 +821,8 @@ class SoupStrainer: def _matches(self, markup, matchAgainst): #print "Matching %s against %s" % (markup, matchAgainst) result = False - if matchAgainst == True and type(matchAgainst) == types.BooleanType: + if matchAgainst == True and (not hasattr(types, 'BooleanType') or + type(matchAgainst) == types.BooleanType): result = markup != None elif callable(matchAgainst): result = matchAgainst(markup) diff --git a/planet/__init__.py b/planet/__init__.py index ceb4b61..baeb991 100644 --- a/planet/__init__.py +++ b/planet/__init__.py @@ -67,8 +67,8 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru options = {} # add original options - for key, value in orig_config.items(list): - options[key] = value + for key in orig_config.options(list): + options[key] = orig_config.get(list, key) try: if use_cache: @@ -85,8 +85,9 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru cached_config.set(list, key, value) # read list + curdir=getattr(os.path, 'curdir', '.') if sys.platform.find('win') < 0: - base = urljoin('file:', os.path.abspath(os.path.curdir)) + base = urljoin('file:', os.path.abspath(curdir)) else: path = os.path.abspath(os.path.curdir) base = urljoin('file:///', path.replace(':','|').replace('\\','/')) diff --git a/planet/feedparser.py b/planet/feedparser.py index 191e374..b261759 100755 --- a/planet/feedparser.py +++ b/planet/feedparser.py @@ -130,6 +130,18 @@ try: except: chardet = None +# reversable htmlentitydefs mappings for Python 2.2 +try: + from htmlentitydefs import name2codepoint, codepoint2name +except: + import htmlentitydefs + name2codepoint={} + codepoint2name={} + for (name,codepoint) in htmlentitydefs.entitydefs.iteritems(): + if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1])) + name2codepoint[name]=ord(codepoint) + codepoint2name[ord(codepoint)]=name + # BeautifulSoup parser used for parsing microformats from embedded HTML content # http://www.crummy.com/software/BeautifulSoup/. At the moment, it appears # that there is a version incompatibility, so the import is replaced with @@ -574,20 +586,9 @@ class _FeedParserMixin: if text.startswith('&#') and text.endswith(';'): return self.handle_entityref(text) else: - # entity resolution graciously donated by Aaron Swartz - def name2cp(k): - import htmlentitydefs - if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3 - return htmlentitydefs.name2codepoint[k] - k = htmlentitydefs.entitydefs[k] - if k.startswith('&#x') and k.endswith(';'): - return int(k[3:-1],16) # not in latin-1 - if k.startswith('&#') and k.endswith(';'): - return int(k[2:-1]) # not in latin-1 - return ord(k) - try: name2cp(ref) + try: name2codepoint[ref] except KeyError: text = '&%s;' % ref - else: text = unichr(name2cp(ref)).encode('utf-8') + else: text = unichr(name2codepoint[ref]).encode('utf-8') self.elementstack[-1][2].append(text) def handle_data(self, text, escape=1): @@ -672,9 +673,9 @@ class _FeedParserMixin: # only if all the remaining content is nested underneath it. # This means that the divs would be retained in the following: #
foo
bar
- if pieces and len(pieces)>1 and not pieces[-1].strip(): + while pieces and len(pieces)>1 and not pieces[-1].strip(): del pieces[-1] - if pieces and len(pieces)>1 and not pieces[0].strip(): + while pieces and len(pieces)>1 and not pieces[0].strip(): del pieces[0] if pieces and (pieces[0] == '
' or pieces[0].startswith('
': depth = 0 @@ -1521,6 +1522,11 @@ if _XML_AVAILABLE: if prefix: localname = prefix.lower() + ':' + localname + elif namespace and not qname: #Expat + for name,value in self.namespacesInUse.items(): + if name and value == namespace: + localname = name + ':' + localname + break if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname)) for (namespace, attrlocalname), attrvalue in attrs._attrs.items(): @@ -1546,6 +1552,11 @@ if _XML_AVAILABLE: prefix = self._matchnamespaces.get(lowernamespace, givenprefix) if prefix: localname = prefix + ':' + localname + elif namespace and not qname: #Expat + for name,value in self.namespacesInUse.items(): + if name and value == namespace: + localname = name + ':' + localname + break localname = str(localname).lower() self.unknown_endtag(localname) @@ -1657,8 +1668,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser): def handle_entityref(self, ref): # called for each entity reference, e.g. for '©', ref will be 'copy' # Reconstruct the original entity reference. - import htmlentitydefs - if not hasattr(htmlentitydefs, 'name2codepoint') or htmlentitydefs.name2codepoint.has_key(ref): + if name2codepoint.has_key(ref): self.pieces.append('&%(ref)s;' % locals()) else: self.pieces.append('&%(ref)s' % locals()) diff --git a/planet/reconstitute.py b/planet/reconstitute.py index 28f13c1..2badc50 100644 --- a/planet/reconstitute.py +++ b/planet/reconstitute.py @@ -193,8 +193,8 @@ def source(xsource, source, bozo, format): if key.startswith('planet_'): createTextElement(xsource, key.replace('_',':',1), value) - createTextElement(xsource, 'planet_bozo', bozo and 'true' or 'false') - createTextElement(xsource, 'planet_format', format) + createTextElement(xsource, 'planet:bozo', bozo and 'true' or 'false') + createTextElement(xsource, 'planet:format', format) def reconstitute(feed, entry): """ create an entry document from a parsed feed """ diff --git a/tests/data/reconstitute/source_bozo.xml b/tests/data/reconstitute/source_bozo.xml new file mode 100644 index 0000000..38a6317 --- /dev/null +++ b/tests/data/reconstitute/source_bozo.xml @@ -0,0 +1,8 @@ + + + + + diff --git a/tests/data/reconstitute/source_format.xml b/tests/data/reconstitute/source_format.xml new file mode 100644 index 0000000..0e41171 --- /dev/null +++ b/tests/data/reconstitute/source_format.xml @@ -0,0 +1,8 @@ + + + + + diff --git a/tests/test_filters.py b/tests/test_filters.py index aeee9a4..296e39f 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -81,6 +81,11 @@ try: sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE) sed.communicate() if sed.returncode != 0: raise Exception -except: +except Exception, expr: # sed is not available del FilterTests.test_stripAd_yahoo + + if isinstance(expr, ImportError): + # Popen is not available + for method in dir(FilterTests): + if method.startswith('test_'): delattr(FilterTests,method)