[ted] Improve playlist extraction (closes #21032)

This commit is contained in:
Sergey M․ 2019-06-08 03:06:41 +07:00
parent c2ee6fa66a
commit dbb1886114
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -157,16 +157,19 @@ class TEDIE(InfoExtractor):
'Downloading playlist webpage') 'Downloading playlist webpage')
playlist_entries = [] playlist_entries = []
for entry in re.findall(r'(?s)<[^>]+data-ga-context="playlist"[^>]*>', webpage): for entry in re.findall(r'(?s)<[^>]+data-ga-context=["\']playlist["\'][^>]*>', webpage):
attrs = extract_attributes(entry) attrs = extract_attributes(entry)
entry_url = compat_urlparse.urljoin(url, attrs['href']) entry_url = compat_urlparse.urljoin(url, attrs['href'])
playlist_entries.append(self.url_result(entry_url, self.ie_key())) playlist_entries.append(self.url_result(entry_url, self.ie_key()))
final_url = self._og_search_url(webpage) final_url = self._og_search_url(webpage, fatal=False)
playlist_id = (
re.match(self._VALID_URL, final_url).group('playlist_id')
if final_url else None)
return self.playlist_result( return self.playlist_result(
playlist_entries, playlist_entries, playlist_id=playlist_id,
playlist_id=re.match(self._VALID_URL, final_url, re.VERBOSE).group('playlist_id'), playlist_title=self._og_search_title(webpage, fatal=False),
playlist_title=self._og_search_title(webpage),
playlist_description=self._og_search_description(webpage)) playlist_description=self._og_search_description(webpage))
def _talk_info(self, url, video_name): def _talk_info(self, url, video_name):