From ee27297f82ccbd702ccd4721d1d3c9d67bbe187e Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sun, 29 May 2022 19:54:22 +1200 Subject: [PATCH] [extractor/youtube] Fix initial player response extraction Authored by: pukkandan, coletdjnz --- yt_dlp/extractor/common.py | 12 +++++-- yt_dlp/extractor/youtube.py | 64 ++++++++++++++++++++++++++----------- 2 files changed, 55 insertions(+), 21 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b24599d5ff..5767662ed5 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1033,11 +1033,19 @@ def _download_json( expected_status=expected_status) return res if res is False else res[0] - def _parse_json(self, json_string, video_id, transform_source=None, fatal=True): + def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, lenient=False): if transform_source: json_string = transform_source(json_string) try: - return json.loads(json_string, strict=False) + try: + return json.loads(json_string, strict=False) + except json.JSONDecodeError as e: + if not lenient: + raise + try: + return json.loads(json_string[:e.pos], strict=False) + except ValueError: + raise e except ValueError as ve: errmsg = '%s: Failed to parse JSON ' % video_id if fatal: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 245778dff2..6d9659b1d7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -397,8 +397,8 @@ def _check_login_required(self): if self._LOGIN_REQUIRED and not self._cookies_passed: self.raise_login_required('Login details are needed to download this content', method='cookies') - _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' - _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' + _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+})\s*;' + _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+})\s*;' _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|