Improved parsing of RSS 2.0 authors
This commit is contained in:
parent
01e47cb49a
commit
4b7891110f
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
|
||||
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
||||
"""
|
||||
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.134 $"[11:16] + "-cvs"
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.135 $"[11:16] + "-cvs"
|
||||
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
@ -1112,12 +1112,14 @@ class _FeedParserMixin:
|
||||
else:
|
||||
author, email = context.get(key), None
|
||||
if not author: return
|
||||
emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))''', author)
|
||||
emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author)
|
||||
if emailmatch:
|
||||
email = emailmatch.group(0)
|
||||
# probably a better way to do the following, but it passes all the tests
|
||||
author = author.replace(email, '')
|
||||
author = author.replace('()', '')
|
||||
author = author.replace('<>', '')
|
||||
author = author.replace('<>', '')
|
||||
author = author.strip()
|
||||
if author and (author[0] == '('):
|
||||
author = author[1:]
|
||||
|
Loading…
x
Reference in New Issue
Block a user