[youtube:tab] Add support for hashtag videos extraction(closes #28308)

This commit is contained in:
Remita Amine 2021-04-01 11:50:30 +01:00
parent ca304beb15
commit 3748863070

View File

@ -1959,7 +1959,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
invidio\.us invidio\.us
)/ )/
(?: (?:
(?:channel|c|user|feed)/| (?:channel|c|user|feed|hashtag)/|
(?:playlist|watch)\?.*?\blist=| (?:playlist|watch)\?.*?\blist=|
(?!(?:watch|embed|v|e)\b) (?!(?:watch|embed|v|e)\b)
) )
@ -2245,6 +2245,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/TheYoungTurks/live', 'url': 'https://www.youtube.com/TheYoungTurks/live',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.youtube.com/hashtag/cctv9',
'info_dict': {
'id': 'cctv9',
'title': '#cctv9',
},
'playlist_mincount': 350,
}] }]
@classmethod @classmethod
@ -2392,6 +2399,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
for entry in self._post_thread_entries(renderer): for entry in self._post_thread_entries(renderer):
yield entry yield entry
def _rich_grid_entries(self, contents):
for content in contents:
video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
if video_renderer:
entry = self._video_entry(video_renderer)
if entry:
yield entry
@staticmethod @staticmethod
def _build_continuation_query(continuation, ctp=None): def _build_continuation_query(continuation, ctp=None):
query = { query = {
@ -2442,8 +2457,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if not tab_content: if not tab_content:
return return
slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict) slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
if not slr_renderer: if slr_renderer:
return
is_channels_tab = tab.get('title') == 'Channels' is_channels_tab = tab.get('title') == 'Channels'
continuation = None continuation = None
slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or [] slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
@ -2488,9 +2502,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if not continuation: if not continuation:
continuation = self._extract_continuation(is_renderer) continuation = self._extract_continuation(is_renderer)
if not continuation: if not continuation:
continuation = self._extract_continuation(slr_renderer) continuation = self._extract_continuation(slr_renderer)
else:
rich_grid_renderer = tab_content.get('richGridRenderer')
if not rich_grid_renderer:
return
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
yield entry
continuation = self._extract_continuation(rich_grid_renderer)
headers = { headers = {
'x-youtube-client-name': '1', 'x-youtube-client-name': '1',
@ -2586,6 +2606,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
yield entry yield entry
continuation = self._extract_continuation(continuation_renderer) continuation = self._extract_continuation(continuation_renderer)
continue continue
renderer = continuation_item.get('richItemRenderer')
if renderer:
for entry in self._rich_grid_entries(continuation_items):
yield entry
continuation = self._extract_continuation({'contents': continuation_items})
continue
break break
@ -2642,7 +2668,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
selected_tab = self._extract_selected_tab(tabs) selected_tab = self._extract_selected_tab(tabs)
renderer = try_get( renderer = try_get(
data, lambda x: x['metadata']['channelMetadataRenderer'], dict) data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
playlist_id = title = description = None playlist_id = item_id
title = description = None
if renderer: if renderer:
channel_title = renderer.get('title') or item_id channel_title = renderer.get('title') or item_id
tab_title = selected_tab.get('title') tab_title = selected_tab.get('title')
@ -2651,12 +2678,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
title += ' - %s' % tab_title title += ' - %s' % tab_title
description = renderer.get('description') description = renderer.get('description')
playlist_id = renderer.get('externalId') playlist_id = renderer.get('externalId')
else:
renderer = try_get( renderer = try_get(
data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
if renderer: if renderer:
title = renderer.get('title') title = renderer.get('title')
description = None else:
playlist_id = item_id renderer = try_get(
data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
if renderer:
title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
playlist = self.playlist_result( playlist = self.playlist_result(
self._entries(selected_tab, identity_token), self._entries(selected_tab, identity_token),
playlist_id=playlist_id, playlist_title=title, playlist_id=playlist_id, playlist_title=title,