From e187799c584cd0feffbd8049ba7c199eb1380f0e Mon Sep 17 00:00:00 2001 From: grqx_wsl <173253225+grqx@users.noreply.github.com> Date: Fri, 26 Jul 2024 02:36:04 +1200 Subject: [PATCH 1/8] patch from https://github.com/yt-dlp/yt-dlp/issues/10554#issuecomment-2250014807 modified: yt_dlp/extractor/bilibili.py --- yt_dlp/extractor/bilibili.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a84b7a6f7c..8e53f59dc1 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -164,14 +164,12 @@ def _sign_wbi(self, params, video_id): params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest() return params - def _download_playinfo(self, bvid, cid, headers=None, qn=None): - params = {'bvid': bvid, 'cid': cid, 'fnval': 4048} - if qn: - params['qn'] = qn + def _download_playinfo(self, bvid, cid, headers=None, **kwargs): + params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **kwargs} return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, - note=f'Downloading video formats for cid {cid} {qn or ""}')['data'] + note=f'Downloading video formats for cid {cid} {kwargs.get("qn", "")}')['data'] def json2srt(self, json_data): srt_data = '' @@ -723,6 +721,7 @@ def _real_extract(self, url): duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), __post_extractor=self.extract_comments(aid)) else: + play_info = self._download_playinfo(video_id, cid, headers=headers, try_look=1) formats = self.extract_formats(play_info) if not traverse_obj(play_info, ('dash')): From 29a59682784daa2143aabdb334ebd3b93eb2205d Mon Sep 17 00:00:00 2001 From: grqx_wsl <173253225+grqx@users.noreply.github.com> Date: Fri, 26 Jul 2024 03:07:32 +1200 Subject: [PATCH 2/8] - Applied try_look to festival videos - Removed redundant calls to `_download_playinfo` --- yt_dlp/extractor/bilibili.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 8e53f59dc1..b9b0487264 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -645,8 +645,7 @@ def _real_extract(self, url): raise ExtractorError( 'This video may be deleted or geo-restricted. ' 'You might want to try a VPN or a proxy server (with --proxy)', expected=True) - play_info = traverse_obj(play_info_obj, ('data', {dict})) - if not play_info: + if not traverse_obj(play_info_obj, ('data', {dict})): if traverse_obj(play_info_obj, 'code') == 87007: toast = get_element_by_class('tips-toast', webpage) or '' msg = clean_html( @@ -681,11 +680,10 @@ def _real_extract(self, url): aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') + play_info = self._download_playinfo(video_id, cid, headers=headers, try_look=1) festival_info = {} if is_festival: - play_info = self._download_playinfo(video_id, cid, headers=headers) - festival_info = traverse_obj(initial_state, { 'uploader': ('videoInfo', 'upName'), 'uploader_id': ('videoInfo', 'upMid', {str_or_none}), @@ -721,7 +719,6 @@ def _real_extract(self, url): duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), __post_extractor=self.extract_comments(aid)) else: - play_info = self._download_playinfo(video_id, cid, headers=headers, try_look=1) formats = self.extract_formats(play_info) if not traverse_obj(play_info, ('dash')): From b01183f9041420ecc4202ff86d7832ee5c4f0a4e Mon Sep 17 00:00:00 2001 From: grqx_wsl <173253225+grqx@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:04:18 +1200 Subject: [PATCH 3/8] pops param `try_look` when logged in. --- yt_dlp/extractor/bilibili.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index b9b0487264..ec86c5964d 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -166,6 +166,8 @@ def _sign_wbi(self, params, video_id): def _download_playinfo(self, bvid, cid, headers=None, **kwargs): params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **kwargs} + if self.is_logged_in: + params.pop('try_look', None) return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, From 90f42036322a229847e35ea0ea15d64489d2775f Mon Sep 17 00:00:00 2001 From: grqx_wsl <173253225+grqx@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:46:41 +1200 Subject: [PATCH 4/8] keep the original `play_info` traversal --- yt_dlp/extractor/bilibili.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index ec86c5964d..25cc1fc996 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -647,7 +647,8 @@ def _real_extract(self, url): raise ExtractorError( 'This video may be deleted or geo-restricted. ' 'You might want to try a VPN or a proxy server (with --proxy)', expected=True) - if not traverse_obj(play_info_obj, ('data', {dict})): + play_info = traverse_obj(play_info_obj, ('data', {dict})) + if not play_info: if traverse_obj(play_info_obj, 'code') == 87007: toast = get_element_by_class('tips-toast', webpage) or '' msg = clean_html( @@ -682,7 +683,9 @@ def _real_extract(self, url): aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') - play_info = self._download_playinfo(video_id, cid, headers=headers, try_look=1) + if is_festival or not self.is_logged_in: + query = {'try_look': 1} if not self.is_logged_in else {} + play_info = self._download_playinfo(video_id, cid, headers=headers, **query) festival_info = {} if is_festival: From 510e29a42ca09577843834846b07ddf7671c9148 Mon Sep 17 00:00:00 2001 From: grqx_wsl <173253225+grqx@users.noreply.github.com> Date: Sat, 27 Jul 2024 22:09:44 +1200 Subject: [PATCH 5/8] add support for _get_interactive_entries --- yt_dlp/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 25cc1fc996..bd04b0e48a 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -285,7 +285,7 @@ def _get_interactive_entries(self, video_id, cid, metainfo, headers=None): ('data', 'interaction', 'graph_version', {int_or_none})) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) for cid, edges in cid_edges.items(): - play_info = self._download_playinfo(video_id, cid, headers=headers) + play_info = self._download_playinfo(video_id, cid, headers=headers, try_look=1) yield { **metainfo, 'id': f'{video_id}_{cid}', From b2965fa3b2643f3d87eaad92cfa1fdfe33c67e86 Mon Sep 17 00:00:00 2001 From: grqx_wsl <173253225+grqx@users.noreply.github.com> Date: Sat, 27 Jul 2024 22:51:15 +1200 Subject: [PATCH 6/8] =?UTF-8?q?[BiliBiliBangumiIE]=20support=20format=2012?= =?UTF-8?q?240(format=20name=20=E6=99=BA=E8=83=BD=E4=BF=AE=E5=A4=8D,=20pre?= =?UTF-8?q?mium=20only)=20[cleanup]code=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- yt_dlp/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index bd04b0e48a..6b68592870 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -726,7 +726,7 @@ def _real_extract(self, url): else: formats = self.extract_formats(play_info) - if not traverse_obj(play_info, ('dash')): + if not play_info.get('dash'): # we only have legacy formats and need additional work has_qn = lambda x: x in traverse_obj(formats, (..., 'quality')) for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})): @@ -858,7 +858,7 @@ def _real_extract(self, url): headers['Referer'] = url play_info = self._download_json( 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, - 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, + 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, headers=headers) premium_only = play_info.get('code') == -10403 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {} From d5dbdbccd34e247f52356ab1b70f594b66e21d25 Mon Sep 17 00:00:00 2001 From: grqx_wsl <173253225+grqx@users.noreply.github.com> Date: Sat, 27 Jul 2024 23:10:53 +1200 Subject: [PATCH 7/8] `_download_playinfo`: more understandable note --- yt_dlp/extractor/bilibili.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 6b68592870..f6391e94ad 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -168,10 +168,14 @@ def _download_playinfo(self, bvid, cid, headers=None, **kwargs): params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **kwargs} if self.is_logged_in: params.pop('try_look', None) + if kwargs.get('qn'): + note = f'Downloading video format {kwargs["qn"]} for cid {cid}' + else: + note = f'Downloading video formats for cid {cid}' + return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, - query=self._sign_wbi(params, bvid), headers=headers, - note=f'Downloading video formats for cid {cid} {kwargs.get("qn", "")}')['data'] + query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] def json2srt(self, json_data): srt_data = '' From 13ed5741689aec27b73cf6a3b762a6d70f9f525b Mon Sep 17 00:00:00 2001 From: grqx_wsl <173253225+grqx@users.noreply.github.com> Date: Tue, 20 Aug 2024 00:46:21 +1200 Subject: [PATCH 8/8] [BiliBiliBangumiIE] supports play_info extraction from webpage - i.e. extracts premiums formats with logged-in cookies, haven't tested with format `12240` yet. * test url: https://www.bilibili.com/bangumi/play/ep829434, cookies: logged-in, non-premium --- yt_dlp/extractor/bilibili.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 39d9e5c5a7..cf6ebc0a5d 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -864,10 +864,16 @@ def _real_extract(self, url): self.raise_login_required('This video is for premium members only') headers['Referer'] = url - play_info = self._download_json( - 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, - 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, - headers=headers) + + play_info = self._search_json( + r'playurlSSRData\s*?=\s*?', webpage, 'embedded page info', episode_id, + end_pattern='\n', default=None) + if not play_info: + play_info = self._download_json( + 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, + 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, + headers=headers) + premium_only = play_info.get('code') == -10403 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}