mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-23 11:31:47 +01:00
[YouTube] Bypass age-gating for certain restricted videos
* Use TVHTML5_SIMPLY_EMBEDDED_PLAYER client * Also add and fix tests * Introduce and use new utility function `update_url()`
This commit is contained in:
parent
e19ec52322
commit
58988c1421
|
@ -42,6 +42,7 @@ from ..utils import (
|
|||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
|
@ -286,15 +287,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True):
|
||||
def _call_api(self, ep, query, video_id, fatal=True, headers=None):
|
||||
data = self._DEFAULT_API_DATA.copy()
|
||||
data.update(query)
|
||||
real_headers = {'content-type': 'application/json'}
|
||||
if headers:
|
||||
real_headers.update(headers)
|
||||
|
||||
return self._download_json(
|
||||
'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
|
||||
note='Downloading API JSON', errnote='Unable to download API page',
|
||||
data=json.dumps(data).encode('utf8'), fatal=fatal,
|
||||
headers={'content-type': 'application/json'},
|
||||
headers=real_headers,
|
||||
query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
|
||||
|
||||
def _extract_yt_initial_data(self, video_id, webpage):
|
||||
|
@ -515,6 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'phihag',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
|
||||
'channel': 'Philipp Hagemeister',
|
||||
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
|
||||
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
|
||||
'upload_date': '20121002',
|
||||
|
@ -524,10 +529,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'duration': 10,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
|
||||
'start_time': 1,
|
||||
'end_time': 9,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
|
||||
|
@ -562,7 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'duration': 10,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -621,8 +625,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
|
||||
}
|
||||
},
|
||||
# Normal age-gate video (No vevo, embed allowed), available via embed page
|
||||
# Age-gated videos
|
||||
{
|
||||
'note': 'Age-gated video (No vevo, embed allowed)',
|
||||
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
|
||||
'info_dict': {
|
||||
'id': 'HtVdAasjOgU',
|
||||
|
@ -631,17 +636,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
|
||||
'duration': 142,
|
||||
'uploader': 'The Witcher',
|
||||
'uploader_id': 'WitcherGame',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
|
||||
'upload_date': '20140605',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
|
||||
'age_limit': 18,
|
||||
'categories': ['Gaming'],
|
||||
'tags': 'count:17',
|
||||
'channel': 'The Witcher',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
|
||||
'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Age-gated video only available with authentication (unavailable
|
||||
# via embed page workaround)
|
||||
'note': 'Age-gated video with embed allowed in public site',
|
||||
'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
|
||||
'info_dict': {
|
||||
'id': 'HsUATh_Nc2U',
|
||||
'ext': 'mp4',
|
||||
'title': 'Godzilla 2 (Official Video)',
|
||||
'description': 'md5:bf77e03fcae5529475e500129b05668a',
|
||||
'duration': 177,
|
||||
'uploader': 'FlyingKitty',
|
||||
'upload_date': '20200408',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
|
||||
'age_limit': 18,
|
||||
'categories': ['Entertainment'],
|
||||
'tags': ['Flyingkitty', 'godzilla 2'],
|
||||
'channel': 'FlyingKitty',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
|
||||
'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'note': 'Age-gated video embedable only with clientScreen=EMBED',
|
||||
'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
|
||||
'info_dict': {
|
||||
'id': 'Tq92D6wQ1mg',
|
||||
'ext': 'mp4',
|
||||
'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
|
||||
'description': 'md5:17eccca93a786d51bc67646756894066',
|
||||
'duration': 106,
|
||||
'uploader': 'Projekt Melody',
|
||||
'upload_date': '20191227',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
|
||||
'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
|
||||
'categories': ['Entertainment'],
|
||||
'channel': 'Projekt Melody',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
|
||||
'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'note': 'Non-Age-gated non-embeddable video',
|
||||
'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
|
||||
'info_dict': {
|
||||
'id': 'MeJVWBSsPAY',
|
||||
'ext': 'mp4',
|
||||
'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
|
||||
'description': 'Fan Video. Music & Lyrics by OOMPH!.',
|
||||
'duration': 210,
|
||||
'uploader': 'Herr Lurik',
|
||||
'uploader_id': 'st3in234',
|
||||
'upload_date': '20130730',
|
||||
'uploader_url': 'http://www.youtube.com/user/st3in234',
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
|
||||
'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
|
||||
'categories': ['Music'],
|
||||
'channel': 'Herr Lurik',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
|
||||
'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
|
||||
'artist': 'OOMPH!',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'note': 'Non-bypassable age-gated video',
|
||||
'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'note': 'Age-gated video only available with authentication (not via embed workaround)',
|
||||
'url': 'XgnwCQzjau8',
|
||||
'only_matching': True,
|
||||
'skip': '''This video has been removed for violating YouTube's Community Guidelines''',
|
||||
},
|
||||
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
|
||||
# YouTube Red ad is not captured for creator
|
||||
|
@ -670,17 +755,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'info_dict': {
|
||||
'id': 'lqQg6PlCWgI',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
|
||||
'description': r're:(?s)(?:.+\s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
|
||||
'duration': 6085,
|
||||
'upload_date': '20150827',
|
||||
'uploader_id': 'olympic',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
|
||||
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
||||
'uploader': 'Olympic',
|
||||
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
|
||||
'uploader': r're:Olympics?',
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
|
||||
'categories': ['Sports'],
|
||||
'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
|
||||
'channel': 'Olympics',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
|
||||
'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires avconv',
|
||||
}
|
||||
},
|
||||
# Non-square pixels
|
||||
{
|
||||
|
@ -1683,27 +1774,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
player_response = self._call_api(
|
||||
'player', {'videoId': video_id}, video_id)
|
||||
|
||||
playability_status = player_response.get('playabilityStatus') or {}
|
||||
if playability_status.get('reason') == 'Sign in to confirm your age':
|
||||
video_info = self._download_webpage(
|
||||
base_url + 'get_video_info', video_id,
|
||||
'Refetching age-gated info webpage',
|
||||
'unable to download video info webpage', query={
|
||||
'video_id': video_id,
|
||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||
'html5': 1,
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
|
||||
'c': 'TVHTML5',
|
||||
'cver': '6.20180913',
|
||||
}, fatal=False)
|
||||
if video_info:
|
||||
pr = self._parse_json(
|
||||
try_get(
|
||||
compat_parse_qs(video_info),
|
||||
lambda x: x['player_response'][0], compat_str) or '{}',
|
||||
video_id, fatal=False)
|
||||
if pr and isinstance(pr, dict):
|
||||
player_response = pr
|
||||
def is_agegated(playability):
|
||||
if not isinstance(playability, dict):
|
||||
return
|
||||
|
||||
if playability.get('desktopLegacyAgeGateReason'):
|
||||
return True
|
||||
|
||||
reasons = filter(None, (playability.get(r) for r in ('status', 'reason')))
|
||||
AGE_GATE_REASONS = (
|
||||
'confirm your age', 'age-restricted', 'inappropriate', # reason
|
||||
'age_verification_required', 'age_check_required', # status
|
||||
)
|
||||
return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
|
||||
|
||||
def get_playability_status(response):
|
||||
return try_get(response, lambda x: x['playabilityStatus'], dict) or {}
|
||||
|
||||
playability_status = get_playability_status(player_response)
|
||||
if (is_agegated(playability_status)
|
||||
and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18):
|
||||
|
||||
self.report_age_confirmation()
|
||||
|
||||
# Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
|
||||
pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
|
||||
query = {
|
||||
'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}},
|
||||
'contentCheckOk': True,
|
||||
'racyCheckOk': True,
|
||||
'context': {
|
||||
'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'},
|
||||
'thirdParty': {'embedUrl': 'https://google.com'},
|
||||
},
|
||||
'videoId': video_id,
|
||||
}
|
||||
headers = {
|
||||
'X-YouTube-Client-Name': '85',
|
||||
'X-YouTube-Client-Version': '2.0',
|
||||
'Origin': 'https://www.youtube.com'
|
||||
}
|
||||
|
||||
video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
|
||||
age_gate_status = get_playability_status(video_info)
|
||||
if age_gate_status.get('status') == 'OK':
|
||||
player_response = video_info
|
||||
playability_status = age_gate_status
|
||||
|
||||
trailer_video_id = try_get(
|
||||
playability_status,
|
||||
|
@ -1932,12 +2048,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
for thumbnail in (try_get(
|
||||
container,
|
||||
lambda x: x['thumbnail']['thumbnails'], list) or []):
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
'url': thumbnail_url,
|
||||
'url': update_url(thumbnail_url, query=None, fragment=None),
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
})
|
||||
if thumbnails:
|
||||
|
@ -2142,6 +2258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
sbr_tooltip = try_get(
|
||||
vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
|
||||
if sbr_tooltip:
|
||||
# however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
|
||||
like_count, dislike_count = sbr_tooltip.split(' / ')
|
||||
info.update({
|
||||
'like_count': str_to_int(like_count),
|
||||
|
@ -2411,7 +2528,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||
'tags': list,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -2438,7 +2554,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||
'categories': ['News & Politics'],
|
||||
'tags': list,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -2458,7 +2573,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||
'categories': ['News & Politics'],
|
||||
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -3043,8 +3157,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
url = compat_urlparse.urlunparse(
|
||||
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
|
||||
url = update_url(url, netloc='www.youtube.com')
|
||||
# Handle both video/playlist URLs
|
||||
qs = parse_qs(url)
|
||||
video_id = qs.get('v', [None])[0]
|
||||
|
@ -3178,7 +3291,6 @@ class YoutubeYtBeIE(InfoExtractor):
|
|||
'categories': ['Nonprofits & Activism'],
|
||||
'tags': list,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
|
|
|
@ -4121,6 +4121,17 @@ def update_url_query(url, query):
|
|||
query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
|
||||
def update_url(url, **kwargs):
|
||||
"""Replace URL components specified by kwargs
|
||||
url: compat_str or parsed URL tuple
|
||||
returns: compat_str"""
|
||||
if not kwargs:
|
||||
return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url
|
||||
if not isinstance(url, tuple):
|
||||
url = compat_urlparse.urlparse(url)
|
||||
return compat_urlparse.urlunparse(url._replace(**kwargs))
|
||||
|
||||
|
||||
def update_Request(req, url=None, data=None, headers={}, query={}):
|
||||
req_headers = req.headers.copy()
|
||||
req_headers.update(headers)
|
||||
|
|
Loading…
Reference in New Issue
Block a user