Only look for DOCTYPE and ENTITY declarations at the beginning of the doc
http://xn--8ws00zhy3a.com/blog/2007/10/obfuscated-atom
This commit is contained in:
parent
d90070f0de
commit
acad3937f8
18
planet/vendor/feedparser.py
vendored
18
planet/vendor/feedparser.py
vendored
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
|
||||
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
||||
"""
|
||||
|
||||
__version__ = "4.2-pre-" + "$Revision: 265 $"[11:14] + "-svn"
|
||||
__version__ = "4.2-pre-" + "$Revision: 266 $"[11:14] + "-svn"
|
||||
__license__ = """Copyright (c) 2002-2007, Mark Pilgrim, All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
@ -3297,11 +3297,15 @@ def _stripDoctype(data):
|
||||
rss_version may be 'rss091n' or None
|
||||
stripped_data is the same XML document, minus the DOCTYPE
|
||||
'''
|
||||
entity_pattern = re.compile(r'<!ENTITY([^>]*?)>', re.MULTILINE)
|
||||
entity_results=entity_pattern.findall(data)
|
||||
data = entity_pattern.sub('', data)
|
||||
doctype_pattern = re.compile(r'<!DOCTYPE([^>]*?)>', re.MULTILINE)
|
||||
doctype_results = doctype_pattern.findall(data)
|
||||
start = re.search('<\w',data)
|
||||
start = start and start.start() or -1
|
||||
head,data = data[:start+1], data[start+1:]
|
||||
|
||||
entity_pattern = re.compile(r'^\s*<!ENTITY([^>]*?)>', re.MULTILINE)
|
||||
entity_results=entity_pattern.findall(head)
|
||||
head = entity_pattern.sub('', head)
|
||||
doctype_pattern = re.compile(r'^\s*<!DOCTYPE([^>]*?)>', re.MULTILINE)
|
||||
doctype_results = doctype_pattern.findall(head)
|
||||
doctype = doctype_results and doctype_results[0] or ''
|
||||
if doctype.lower().count('netscape'):
|
||||
version = 'rss091n'
|
||||
@ -3315,7 +3319,7 @@ def _stripDoctype(data):
|
||||
safe_entities=filter(lambda e: safe_pattern.match(e),entity_results)
|
||||
if safe_entities:
|
||||
replacement='<!DOCTYPE feed [\n <!ENTITY %s>\n]>' % '>\n <!ENTITY '.join(safe_entities)
|
||||
data = doctype_pattern.sub(replacement, data)
|
||||
data = doctype_pattern.sub(replacement, head) + data
|
||||
|
||||
return version, data, dict(replacement and safe_pattern.findall(replacement))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user