From acad3937f8101809b259baf3ff6a1d1910209c37 Mon Sep 17 00:00:00 2001
From: Sam Ruby <rubys@intertwingly.net>
Date: Wed, 24 Oct 2007 11:17:59 -0400
Subject: [PATCH] Only look for DOCTYPE and ENTITY declarations at the
 beginning of the doc http://xn--8ws00zhy3a.com/blog/2007/10/obfuscated-atom

---
 planet/vendor/feedparser.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/planet/vendor/feedparser.py b/planet/vendor/feedparser.py
index ec46598..9961c34 100755
--- a/planet/vendor/feedparser.py
+++ b/planet/vendor/feedparser.py
@@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
 Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
 """
 
-__version__ = "4.2-pre-" + "$Revision: 265 $"[11:14] + "-svn"
+__version__ = "4.2-pre-" + "$Revision: 266 $"[11:14] + "-svn"
 __license__ = """Copyright (c) 2002-2007, Mark Pilgrim, All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification,
@@ -3297,11 +3297,15 @@ def _stripDoctype(data):
     rss_version may be 'rss091n' or None
     stripped_data is the same XML document, minus the DOCTYPE
     '''
-    entity_pattern = re.compile(r'<!ENTITY([^>]*?)>', re.MULTILINE)
-    entity_results=entity_pattern.findall(data)
-    data = entity_pattern.sub('', data)
-    doctype_pattern = re.compile(r'<!DOCTYPE([^>]*?)>', re.MULTILINE)
-    doctype_results = doctype_pattern.findall(data)
+    start = re.search('<\w',data)
+    start = start and start.start() or -1
+    head,data = data[:start+1], data[start+1:]
+    
+    entity_pattern = re.compile(r'^\s*<!ENTITY([^>]*?)>', re.MULTILINE)
+    entity_results=entity_pattern.findall(head)
+    head = entity_pattern.sub('', head)
+    doctype_pattern = re.compile(r'^\s*<!DOCTYPE([^>]*?)>', re.MULTILINE)
+    doctype_results = doctype_pattern.findall(head)
     doctype = doctype_results and doctype_results[0] or ''
     if doctype.lower().count('netscape'):
         version = 'rss091n'
@@ -3315,7 +3319,7 @@ def _stripDoctype(data):
        safe_entities=filter(lambda e: safe_pattern.match(e),entity_results)
        if safe_entities:
            replacement='<!DOCTYPE feed [\n  <!ENTITY %s>\n]>' % '>\n  <!ENTITY '.join(safe_entities)
-    data = doctype_pattern.sub(replacement, data)
+    data = doctype_pattern.sub(replacement, head) + data
 
     return version, data, dict(replacement and safe_pattern.findall(replacement))