[extractor/common] Allow multiline content tags

This commit is contained in:
Sergey M․ 2015-01-01 00:37:14 +06:00
parent b7a7319c38
commit 6c6f1408f2

View File

@ -589,7 +589,7 @@ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs
if display_name is None: if display_name is None:
display_name = name display_name = name
return self._html_search_regex( return self._html_search_regex(
r'''(?ix)<meta r'''(?isx)<meta
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name), [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
html, display_name, fatal=fatal, group='content', **kwargs) html, display_name, fatal=fatal, group='content', **kwargs)