From 4b7891110f48b2b2d0dfa2516ddfc78682a94f19 Mon Sep 17 00:00:00 2001 From: Sam Ruby Date: Thu, 31 Aug 2006 20:20:24 -0400 Subject: [PATCH] Improved parsing of RSS 2.0 authors --- planet/feedparser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/planet/feedparser.py b/planet/feedparser.py index 1845897..1a6ae15 100755 --- a/planet/feedparser.py +++ b/planet/feedparser.py @@ -11,7 +11,7 @@ Recommended: Python 2.3 or later Recommended: CJKCodecs and iconv_codec """ -__version__ = "4.2-pre-" + "$Revision: 1.134 $"[11:16] + "-cvs" +__version__ = "4.2-pre-" + "$Revision: 1.135 $"[11:16] + "-cvs" __license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -1112,12 +1112,14 @@ class _FeedParserMixin: else: author, email = context.get(key), None if not author: return - emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))''', author) + emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author) if emailmatch: email = emailmatch.group(0) # probably a better way to do the following, but it passes all the tests author = author.replace(email, '') author = author.replace('()', '') + author = author.replace('<>', '') + author = author.replace('<>', '') author = author.strip() if author and (author[0] == '('): author = author[1:]