Sam Ruby 2006-09-13 Add stop-opacity; resynch with feedparser

This commit is contained in:
Jacques Distler 2006-09-14 22:23:30 -05:00
commit a0f4a4cace

View File

@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
"""
__version__ = "4.2-pre-" + "$Revision: 1.135 $"[11:16] + "-cvs"
__version__ = "4.2-pre-" + "$Revision: 1.139 $"[11:16] + "-cvs"
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@ -569,7 +569,7 @@ class _FeedParserMixin:
if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
text = '&%s;' % ref
elif ref in self.entities:
elif ref in self.entities.keys():
text = self.entities[ref]
if text.startswith('&#') and text.endswith(';'):
return self.handle_entityref(text)
@ -765,7 +765,7 @@ class _FeedParserMixin:
# map win-1252 extensions to the proper code points
if type(output) == type(u''):
output = u''.join([c in _cp1252 and _cp1252[c] or c for c in output])
output = u''.join([c in _cp1252.keys() and _cp1252[c] or c for c in output])
# categories/tags/keywords/whatever are handled in _end_category
if element == 'category':
@ -839,7 +839,7 @@ class _FeedParserMixin:
if filter(lambda e: e not in entitydefs.keys(),
re.findall(r'&(\w+);',str)): return
return True
return 1
def _mapToStandardPrefix(self, name):
colonpos = name.find(':')
@ -1649,7 +1649,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
else:
value = unichr(int(ref))
if value in _cp1252:
if value in _cp1252.keys():
self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:])
else:
self.pieces.append('&#%(ref)s;' % locals())
@ -2284,7 +2284,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
'preserveAspectRatio', 'r', 'repeatCount', 'repeatDur',
'requiredExtensions', 'requiredFeatures', 'restart', 'rotate', 'rx',
'ry', 'slope', 'stemh', 'stemv', 'stop-color',
'ry', 'slope', 'stemh', 'stemv', 'stop-color', 'stop-opacity',
'strikethrough-position', 'strikethrough-thickness', 'stroke',
'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
'stroke-linejoin', 'stroke-miterlimit', 'stroke-width',
@ -2514,10 +2514,12 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
If the etag argument is supplied, it will be used as the value of an
If-None-Match request header.
If the modified argument is supplied, it must be a tuple of 9 integers
as returned by gmtime() in the standard Python time module. This MUST
be in GMT (Greenwich Mean Time). The formatted date/time will be used
as the value of an If-Modified-Since request header.
If the modified argument is supplied, it can be a tuple of 9 integers
(as returned by gmtime() in the standard Python time module) or a date
string in any format supported by feedparser. Regardless, it MUST
be in GMT (Greenwich Mean Time). It will be reformatted into an
RFC 1123-compliant date and used as the value of an If-Modified-Since
request header.
If the agent argument is supplied, it will be used as the value of a
User-Agent request header.
@ -2563,6 +2565,8 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
request.add_header('User-Agent', agent)
if etag:
request.add_header('If-None-Match', etag)
if type(modified) == type(''):
modified = _parse_date(modified)
if modified:
# format into an RFC 1123-compliant timestamp. We can't use
# time.strftime() since the %a and %b directives can be affected
@ -3414,20 +3418,86 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
result['namespaces'] = feedparser.namespacesInUse
return result
class Serializer:
def __init__(self, results):
self.results = results
class TextSerializer(Serializer):
def write(self, stream=sys.stdout):
self._writer(stream, self.results, '')
def _writer(self, stream, node, prefix):
if not node: return
if hasattr(node, 'keys'):
keys = node.keys()
keys.sort()
for k in keys:
if k in ('description', 'link'): continue
if node.has_key(k + '_detail'): continue
if node.has_key(k + '_parsed'): continue
self._writer(stream, node[k], prefix + k + '.')
elif type(node) == types.ListType:
index = 0
for n in node:
self._writer(stream, n, prefix[:-1] + '[' + str(index) + '].')
index += 1
else:
try:
s = str(node).encode('utf-8')
s = s.replace('\\', '\\\\')
s = s.replace('\r', '')
s = s.replace('\n', r'\n')
stream.write(prefix[:-1])
stream.write('=')
stream.write(s)
stream.write('\n')
except:
pass
class PprintSerializer(Serializer):
def write(self, stream=sys.stdout):
stream.write(self.results['href'] + '\n\n')
from pprint import pprint
pprint(self.results, stream)
stream.write('\n')
if __name__ == '__main__':
if not sys.argv[1:]:
print __doc__
sys.exit(0)
try:
from optparse import OptionParser
except:
OptionParser = None
if OptionParser:
optionParser = OptionParser(version=__version__, usage="%prog [options] url_or_filename_or_-")
optionParser.set_defaults(format="pprint")
optionParser.add_option("-A", "--user-agent", dest="agent", metavar="AGENT", help="User-Agent for HTTP URLs")
optionParser.add_option("-e", "--referer", "--referrer", dest="referrer", metavar="URL", help="Referrer for HTTP URLs")
optionParser.add_option("-t", "--etag", dest="etag", metavar="TAG", help="ETag/If-None-Match for HTTP URLs")
optionParser.add_option("-m", "--last-modified", dest="modified", metavar="DATE", help="Last-modified/If-Modified-Since for HTTP URLs (any supported date format)")
optionParser.add_option("-f", "--format", dest="format", metavar="FORMAT", help="output results in FORMAT (text, pprint)")
optionParser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="write debugging information to stderr")
(options, urls) = optionParser.parse_args()
if options.verbose:
_debug = 1
if not urls:
optionParser.print_help()
sys.exit(0)
else:
if not sys.argv[1:]:
print __doc__
sys.exit(0)
class _Options:
etag = modified = agent = referrer = None
format = 'pprint'
options = _Options()
urls = sys.argv[1:]
zopeCompatibilityHack()
from pprint import pprint
serializer = globals().get(options.format.capitalize() + 'Serializer', Serializer)
for url in urls:
print url
print
result = parse(url)
pprint(result)
print
results = parse(url, etag=options.etag, modified=options.modified, agent=options.agent, referrer=options.referrer)
serializer(results).write(sys.stdout)
#REVISION HISTORY
#1.0 - 9/27/2002 - MAP - fixed namespace processing on prefixed RSS 2.0 elements,