From a29bfa3b0c7ccaad49fb66aa8a94fd7e807fca6b Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 28 Sep 2024 15:43:41 +0000 Subject: [PATCH 01/11] [ie/Fifa] Add extractors --- yt_dlp/extractor/_extractors.py | 8 +- yt_dlp/extractor/fifa.py | 395 +++++++++++++++++++++++++++++--- 2 files changed, 369 insertions(+), 34 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4b1f4c316d..e47318c2ba 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -640,7 +640,13 @@ FC2LiveIE, ) from .fczenit import FczenitIE -from .fifa import FifaIE +from .fifa import ( + FifaArticleIE, + FifaContentIE, + FifaIE, + FifaMovieIE, + FifaSeriesIE, +) from .filmon import ( FilmOnChannelIE, FilmOnIE, diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index ae837f6a02..68ae6206ee 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -1,67 +1,241 @@ +import functools +import json + from .common import InfoExtractor from ..utils import ( + ExtractorError, + determine_ext, int_or_none, - traverse_obj, + parse_iso8601, + strip_or_none, unified_timestamp, + update_url_query, ) +from ..utils.traversal import traverse_obj -class FifaIE(InfoExtractor): - _VALID_URL = r'https?://www\.fifa\.com/fifaplus/(?P\w{2})/watch/([^#?]+/)?(?P\w+)' +class FifaContentIE(InfoExtractor): + _VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?P\w{2})/content/(?P[\w-]+)/(?P[\w-]+)/?(?:[#?]|$)' + + def _real_initialize(self): + self._HEADERS = { + 'content-type': 'application/json; charset=UTF-8', + 'x-chili-accept-language': 'en', + 'x-chili-api-version': '1.1', + 'x-chili-authenticated': 'false', + 'x-chili-device-id': 'undefined', + 'x-chili-device-profile': 'WEB', + 'x-chili-device-store': 'CHILI', + 'x-chili-user-country': 'US', + 'x-chili-accept-stream-mode': 'multi/codec-compatibility;q=0.8, mono/strict;q=0.7', + 'x-chili-avod-compatibility': 'free,free-ads', + 'x-chili-manifest-properties': 'subtitles', + 'x-chili-streaming-proto': 'https', + } + device_info = self._download_json( + 'https://www.plus.fifa.com/gatekeeper/api/v1/devices/', None, 'Getting device info', + headers=self._HEADERS, + data=json.dumps({ + 'appVersion': '2.6.93', + 'displayName': None, + 'model': 'Chrome', + 'manufacturer': 'Google Inc.', + 'osName': 'Windows', + 'osVersion': '10', + 'platform': 'Chrome', + 'platformVersion': '129.0.0.0', + 'architecture': 'unknown', + 'profile': 'WEB', + 'store': 'CHILI', + 'screenWidth': '1920', + 'screenHeight': '1080', + }).encode()) + self._HEADERS['x-chili-device-id'] = device_info['id'] + + def _call_api(self, path, video_id, note=None, **kwargs): + return self._download_json( + f'https://www.plus.fifa.com/flux-capacitor/api/v1//{path}', video_id, note, **kwargs) + + def _real_extract(self, url): + urlh = self._request_webpage(url, self._match_id(url)) + video_id, display_id, locale = self._match_valid_url(urlh.url).group('id', 'display_id', 'locale') + + video_info = self._call_api( + 'videoasset', video_id, 'Downloading video asset', + headers=self._HEADERS, query={'catalog': video_id})[0] + + formats = [] + subtitles = {} + + for stream_type in [ + 'hls/cbcs+h265.sdr;q=0.9, hls/cbcs+h264;q=0.5, hls/clear+h264;q=0.4, mp4/;q=0.1', + 'mpd/cenc+h264;q=0.9, mpd/clear+h264;q=0.7, mp4/;q=0.1', + ]: + session_info = self._call_api( + 'streaming/session', video_id, 'Getting streaming session', + headers={**self._HEADERS, 'x-chili-accept-stream': stream_type}, + data=json.dumps({'videoAssetId': video_info['id'], 'autoPlay': False}).encode()) + + streams_info = self._call_api( + 'streaming/urls', video_id, 'Getting streaming urls', + headers={**self._HEADERS, 'x-chili-streaming-session': session_info['id']}) + + for playlist_url in traverse_obj(streams_info, (..., 'url')): + ext = determine_ext(playlist_url) + if ext == 'mpd': + fmts, subs = self._extract_mpd_formats_and_subtitles(playlist_url, video_id) + formats.extend(fmts) + self._merge_subtitles(subs, subtitles) + elif ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id, m3u8_id='hls') + formats.extend(fmts) + self._merge_subtitles(subs, subtitles) + else: + raise ExtractorError(f'Unknown playlist URL {playlist_url}', video_id=video_id) + + self._remove_duplicate_formats(formats) + + return { + 'id': video_id, + 'title': video_info['title'], + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': [{ + 'url': update_url_query(x, {'width': 1408}), + 'width': 1408, + } for x in [video_info.get('posterUrl'), video_info.get('wideCoverUrl')] if x], + } + + +class FifaBaseIE(InfoExtractor): + @functools.cached_property + def _preconnect_link(self): + return self._search_regex( + r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', + self._download_webpage('https://fifa.com/', None), 'Preconnect Link') + + def _call_api(self, path, video_id, note=None, **kwargs): + return self._download_json(f'{self._preconnect_link}/{path}', video_id, note, **kwargs) + + +class FifaIE(FifaBaseIE): + _VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P\w{2})/watch/(?P[-\w]+)/?(?:[#?]|$)' _TESTS = [{ 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y', 'info_dict': { - 'id': '7on10qPcnyLajDDU3ntg6y', + 'id': 'fee2f7e8-92fa-42c5-805c-a2c949015eae', 'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay', - 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', - 'ext': 'mp4', - 'categories': ['FIFA Tournaments'], - 'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero', - 'duration': 8165, - 'release_timestamp': 1152403200, - 'release_date': '20060709', + 'display_id': 'italy-v-france-final-2006-fifa-world-cup-germany-full-match-replay', + 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', }, - 'params': {'skip_download': 'm3u8'}, + 'params': { + 'skip_download': 'm3u8', + 'ignore_no_formats_error': True, + }, + 'expected_warnings': [ + 'Requested format is not available', + 'This video is DRM protected', + ], }, { 'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV', 'info_dict': { - 'id': '1cg5r5Qt6Qt12ilkDgb1sV', + 'id': 'd4f4a2cb-5966-4af7-8a05-98ef4732af2b', 'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights', - 'description': 'md5:d908c74ee66322b804ae2e521b02a855', - 'ext': 'mp4', - 'categories': ['FIFA Tournaments', 'Highlights'], - 'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB', - 'duration': 902, - 'release_timestamp': 1404777600, - 'release_date': '20140708', + 'display_id': 'brasil-x-alemanha-semifinais-copa-do-mundo-fifa-brasil-2014-compacto', + 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', }, - 'params': {'skip_download': 'm3u8'}, + 'params': { + 'skip_download': 'm3u8', + 'ignore_no_formats_error': True, + }, + 'expected_warnings': [ + 'Requested format is not available', + 'This video is DRM protected', + ], }, { 'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp', 'info_dict': { 'id': '3C6gQH9C2DLwzNx7BMRQdp', - 'title': 'Josimar goal against Northern Ireland | Classic Goals', - 'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b', 'ext': 'mp4', - 'categories': ['FIFA Tournaments', 'Goal'], - 'duration': 28, - 'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR', + 'title': 'Josimar goal against Northern Ireland | Classic Goals', }, 'params': {'skip_download': 'm3u8'}, + 'skip': 'HTTP Error 403: Forbidden', + }, { + 'url': 'https://www.fifa.com/fifaplus/en/watch/2KhLLn6aiGW3nr8sNm8Hkv', + 'info_dict': { + 'id': '2KhLLn6aiGW3nr8sNm8Hkv', + 'ext': 'mp4', + 'title': "Le Sommer: Lyon-Barcelona a beautiful final for women's football", + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'HTTP Error 403: Forbidden', + }, { + 'url': 'https://www.fifa.com/fifaplus/en/watch/4V8H8qv7QM1LNVk5gUwYFa', + 'info_dict': { + 'id': '709abaec-5eef-4ad8-a02d-19a8932f42a2', + 'title': "Christine Sinclair at 19 | FIFA U-19 Women's World Championship Canada 2002™", + 'display_id': 'christine-sinclair-at-19-fifa-u-19-womens-world-championship-canada-2002', + 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', + }, + 'params': { + 'skip_download': 'm3u8', + 'ignore_no_formats_error': True, + }, + 'expected_warnings': [ + 'Requested format is not available', + 'This video is DRM protected', + ], + }, { + 'url': 'https://www.fifa.com/fifaplus/en/watch/d85632f9-7009-4ea0-aaf1-8d6847e4a148', + 'info_dict': { + 'id': 'bbe5d2a3-3dfd-4283-a1af-3a66022e8254', + 'title': 'Croatia v Australia | Group F | 2006 FIFA World Cup Germany™ | Full Match Replay', + 'display_id': 'croatia-v-australia-or-group-f-or-2006-fifa-world-cup', + 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', + }, + 'params': { + 'skip_download': 'm3u8', + 'ignore_no_formats_error': True, + }, + 'expected_warnings': [ + 'Requested format is not available', + 'This video is DRM protected', + ], + }, { + 'url': 'https://www.fifa.com/fifaplus/pt/watch/Ny88zzqsVnxCBUJ6fZzPy', + 'info_dict': { + 'id': '3d2612ff-c06f-4a7e-a2d7-ec73504515b5', + 'title': 'The Happiest Man in the World', + 'display_id': 'o-homem-mais-feliz-do-mundo', + 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', + }, + 'params': { + 'skip_download': 'm3u8', + 'ignore_no_formats_error': True, + }, + 'expected_warnings': [ + 'Requested format is not available', + 'This video is DRM protected', + ], }] def _real_extract(self, url): video_id, locale = self._match_valid_url(url).group('id', 'locale') - webpage = self._download_webpage(url, video_id) - preconnect_link = self._search_regex( - r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') + if redirect_url := traverse_obj(self._call_api( + f'pages/{locale}/watch/{video_id}', video_id, 'Downloading redirection info'), 'redirectUrl'): + return self.url_result(redirect_url) + urlh = self._request_webpage(url, self._match_id(url)) + if urlh.url != url: + return self.url_result(urlh.url) - video_details = self._download_json( - f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) + video_details = self._call_api( + f'sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) - preplay_parameters = self._download_json( - f'{preconnect_link}/videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters'] + preplay_parameters = self._call_api( + f'videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters'] content_data = self._download_json( 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters), @@ -80,3 +254,158 @@ def _real_extract(self, url): 'formats': formats, 'subtitles': subtitles, } + + +class FifaArticleIE(FifaBaseIE): + _VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P\w{2})/articles/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.fifa.com/en/articles/foord-talks-2023-and-battling-kerr-for-the-wsl-title', + 'info_dict': { + '_type': 'multi_video', + 'id': 'foord-talks-2023-and-battling-kerr-for-the-wsl-title', + 'title': 'Foord talks 2023 and battling Kerr for the WSL title', + 'timestamp': 1651136400, + 'upload_date': '20220428', + }, + 'playlist': [{ + 'info_dict': { + 'id': '6B2xtOT2SDMB4JeF3i9n2y', + 'ext': 'mp4', + 'title': 'Foord & Kerr: Friends and rivals', + 'description': 'md5:756e14e1814196948ec4d2a9663f7214', + 'duration': 82, + 'categories': ['News', 'Interview'], + 'thumbnail': r're:https://digitalhub\.fifa\.com/transform/[^/]+/\w+', + }, + }, { + 'info_dict': { + 'id': 'R2Y1vbwvggrlSr02Cfr99', + 'ext': 'mp4', + 'title': 'Foord: 2023 will be the best Women\'s World Cup yet', + 'description': 'Matildas star Caitlin Foord looks ahead to the FIFA Women\'s World Cup Australia & New Zealand 2023™.', + 'duration': 44, + 'categories': ['News', 'Interview'], + 'thumbnail': r're:https://digitalhub\.fifa\.com/transform/[^/]+/\w+', + }, + }], + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.fifa.com/fifaplus/en/articles/stars-set-to-collide-in-uwcl-final', + 'info_dict': { + '_type': 'multi_video', + 'id': 'stars-set-to-collide-in-uwcl-final', + 'title': 'Stars set to collide in Women’s Champions League final ', + 'timestamp': 1652950800, + 'upload_date': '20220519', + }, + 'playlist_count': 3, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + article_id, locale = self._match_valid_url(url).group('id', 'locale') + + page_id = self._call_api(f'pages/en/articles/{article_id}', article_id)['pageId'] + page_info = self._call_api(f'sections/article/{page_id}', article_id, query={'locale': locale}) + + video_ids = [] + if hero_video_entry_id := page_info.get('heroVideoEntryId'): + video_ids.append(hero_video_entry_id) + + video_ids.extend(traverse_obj(page_info, ( + 'richtext', 'content', lambda _, v: v['data']['target']['contentTypesCheckboxValue'] == 'Video', + 'data', 'target', 'sys', 'id'))) + + return self.playlist_from_matches( + [f'https://www.fifa.com/fifaplus/{locale}/watch/{video_id}' for video_id in video_ids], + article_id, page_info.get('articleTitle'), ie=FifaIE, multi_video=True, + timestamp=parse_iso8601(page_info.get('articlePublishedDate'))) + + +class FifaMovieIE(FifaBaseIE): + _VALID_URL = r'https?://(www\.)?fifa\.com/fifaplus/(?P\w{2})/watch/movie/(?P\w+)[/?\?\#]?' + _TESTS = [{ + 'url': 'https://www.fifa.com/fifaplus/en/watch/movie/2OFuZ9TGyPH6x7nZsgnVBN', + 'info_dict': { + '_type': 'multi_video', + 'id': '2OFuZ9TGyPH6x7nZsgnVBN', + 'title': 'Bravas de Juárez', + 'description': 'md5:1c36885f34d1c142f66ddd5acd5226b2', + }, + 'playlist_count': 2, + }, { + 'url': 'https://www.fifa.com/fifaplus/en/watch/movie/01ioUo8QHiajSisrvP3ES2', + 'info_dict': { + '_type': 'multi_video', + 'id': '01ioUo8QHiajSisrvP3ES2', + 'title': 'Le Moment | The Official Film of the 2019 FIFA Women’s World Cup™', + 'description': 'md5:fbc803feb6fcbc82d2a73e914244484c', + }, + 'playlist_count': 1, + }, { + 'url': 'https://www.fifa.com/fifaplus/en/watch/movie/69GbI9lVcwhOeBvea5eKUB', + 'info_dict': { + '_type': 'multi_video', + 'id': '69GbI9lVcwhOeBvea5eKUB', + 'title': 'Dreams | The Official Film of the 2018 FIFA World Cup™', + 'description': 'md5:e79dd17af4dcab1dd446ef6e22a79330', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + movie_id, locale = self._match_valid_url(url).group('id', 'locale') + + movie_details = self._call_api( + f'sections/movieDetails/{movie_id}', movie_id, 'Downloading Movie Details', query={'locale': locale}) + + video_ids = traverse_obj(movie_details, ('trailers', ..., 'entryId')) + if video_entry_id := traverse_obj(movie_details, ('video', 'videoEntryId')): + video_ids.append(video_entry_id) + + return self.playlist_from_matches( + [f'https://www.fifa.com/fifaplus/{locale}/watch/{video_id}' for video_id in video_ids], + movie_id, traverse_obj(movie_details, ('video', 'title')), ie=FifaIE, multi_video=True, + playlist_description=traverse_obj(movie_details, ('video', 'description'))) + + +class FifaSeriesIE(FifaBaseIE): + _VALID_URL = r'https?://(www\.)?fifa\.com/fifaplus/(?P\w{2})/watch/series/(?P\w+)/(?P\w+)/(?P\w+)[/?\?\#]?' + _TESTS = [{ + 'url': 'https://www.fifa.com/fifaplus/en/watch/series/48PQFX2J4TiDJcxWOxUPho/2ka5yomq8MBvfxe205zdQ9/6H72309PLWXafBIavvPzPQ#ReadMore', + 'info_dict': { + '_type': 'multi_video', + 'id': '48PQFX2J4TiDJcxWOxUPho', + 'title': 'Episode 1 | Kariobangi', + 'description': 'md5:ecbc8668f828d3cc2c0d00edcc0af04f', + }, + 'playlist_count': 4, + }, { + 'url': 'https://www.fifa.com/fifaplus/en/watch/series/5Ja1dDLuudkFF95OVHcYBG/5epcWav73zMbjTJh2RxIOt/1NIHdDxPlYodbNobjS1iX5', + 'info_dict': { + '_type': 'multi_video', + 'id': '5Ja1dDLuudkFF95OVHcYBG', + 'title': 'Paul Pogba and Aaron Wan Bissaka | HD Cutz', + 'description': 'md5:16dc373774f503ef91f4489ca17c3f49', + }, + 'playlist_count': 10, + }] + + def _real_extract(self, url): + series_id, locale, season_id, episode_id = self._match_valid_url(url).group('serie_id', 'locale', 'season_id', 'episode_id') + + serie_details = self._call_api( + 'sections/videoEpisodeDetails', series_id, 'Downloading Serie Details', query={ + 'locale': locale, + 'seriesId': series_id, + 'seasonId': season_id, + 'episodeId': episode_id, + }) + + video_ids = traverse_obj(serie_details, ('seasons', ..., 'episodes', ..., 'entryId')) + video_ids.extend(traverse_obj(serie_details, ('trailers', ..., 'entryId'))) + + return self.playlist_from_matches( + [f'https://www.fifa.com/fifaplus/{locale}/watch/{video_id}' for video_id in video_ids], + series_id, strip_or_none(serie_details.get('title')), ie=FifaIE, multi_video=True, + playlist_description=strip_or_none(serie_details.get('description'))) From 4e649c6a2814ef81084b0c9d883304ca1536e218 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 13 Oct 2024 10:26:00 +0000 Subject: [PATCH 02/11] use "getter" in playlist_from_matches() --- yt_dlp/extractor/fifa.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index 68ae6206ee..9540b46531 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -317,9 +317,9 @@ def _real_extract(self, url): 'data', 'target', 'sys', 'id'))) return self.playlist_from_matches( - [f'https://www.fifa.com/fifaplus/{locale}/watch/{video_id}' for video_id in video_ids], - article_id, page_info.get('articleTitle'), ie=FifaIE, multi_video=True, - timestamp=parse_iso8601(page_info.get('articlePublishedDate'))) + video_ids, article_id, page_info.get('articleTitle'), + getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}', + ie=FifaIE, multi_video=True, timestamp=parse_iso8601(page_info.get('articlePublishedDate'))) class FifaMovieIE(FifaBaseIE): @@ -364,9 +364,9 @@ def _real_extract(self, url): video_ids.append(video_entry_id) return self.playlist_from_matches( - [f'https://www.fifa.com/fifaplus/{locale}/watch/{video_id}' for video_id in video_ids], - movie_id, traverse_obj(movie_details, ('video', 'title')), ie=FifaIE, multi_video=True, - playlist_description=traverse_obj(movie_details, ('video', 'description'))) + video_ids, movie_id, traverse_obj(movie_details, ('video', 'title')), + getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}', + ie=FifaIE, multi_video=True, playlist_description=traverse_obj(movie_details, ('video', 'description'))) class FifaSeriesIE(FifaBaseIE): @@ -406,6 +406,6 @@ def _real_extract(self, url): video_ids.extend(traverse_obj(serie_details, ('trailers', ..., 'entryId'))) return self.playlist_from_matches( - [f'https://www.fifa.com/fifaplus/{locale}/watch/{video_id}' for video_id in video_ids], - series_id, strip_or_none(serie_details.get('title')), ie=FifaIE, multi_video=True, - playlist_description=strip_or_none(serie_details.get('description'))) + video_ids, series_id, strip_or_none(serie_details.get('title')), + getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}', + ie=FifaIE, multi_video=True, playlist_description=strip_or_none(serie_details.get('description'))) From bb98db29a109ae5783ee0bf41b8ef81da317df5b Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:03:03 +0000 Subject: [PATCH 03/11] simplify _call_api --- yt_dlp/extractor/fifa.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index 9540b46531..b79a17952c 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -52,17 +52,19 @@ def _real_initialize(self): }).encode()) self._HEADERS['x-chili-device-id'] = device_info['id'] - def _call_api(self, path, video_id, note=None, **kwargs): + def _call_api(self, path, video_id, note=None, headers=None, query=None, data=None): return self._download_json( - f'https://www.plus.fifa.com/flux-capacitor/api/v1//{path}', video_id, note, **kwargs) + f'https://www.plus.fifa.com/flux-capacitor/api/v1//{path}', video_id, note, headers={ + **self._HEADERS, + **(headers or {}), + }, query=query, data=data) def _real_extract(self, url): urlh = self._request_webpage(url, self._match_id(url)) video_id, display_id, locale = self._match_valid_url(urlh.url).group('id', 'display_id', 'locale') video_info = self._call_api( - 'videoasset', video_id, 'Downloading video asset', - headers=self._HEADERS, query={'catalog': video_id})[0] + 'videoasset', video_id, 'Downloading video asset', query={'catalog': video_id})[0] formats = [] subtitles = {} @@ -73,12 +75,12 @@ def _real_extract(self, url): ]: session_info = self._call_api( 'streaming/session', video_id, 'Getting streaming session', - headers={**self._HEADERS, 'x-chili-accept-stream': stream_type}, + headers={'x-chili-accept-stream': stream_type}, data=json.dumps({'videoAssetId': video_info['id'], 'autoPlay': False}).encode()) streams_info = self._call_api( 'streaming/urls', video_id, 'Getting streaming urls', - headers={**self._HEADERS, 'x-chili-streaming-session': session_info['id']}) + headers={'x-chili-streaming-session': session_info['id']}) for playlist_url in traverse_obj(streams_info, (..., 'url')): ext = determine_ext(playlist_url) @@ -115,8 +117,9 @@ def _preconnect_link(self): r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', self._download_webpage('https://fifa.com/', None), 'Preconnect Link') - def _call_api(self, path, video_id, note=None, **kwargs): - return self._download_json(f'{self._preconnect_link}/{path}', video_id, note, **kwargs) + def _call_api(self, path, video_id, note=None, query=None): + return self._download_json( + f'{self._preconnect_link}/{path}', video_id, note, query=query) class FifaIE(FifaBaseIE): From 3585397c7f85ea0f2a6708a9543d0ef2c2bfe7b0 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:07:26 +0000 Subject: [PATCH 04/11] add fatal to _call_api --- yt_dlp/extractor/fifa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index b79a17952c..967574475a 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -117,9 +117,9 @@ def _preconnect_link(self): r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', self._download_webpage('https://fifa.com/', None), 'Preconnect Link') - def _call_api(self, path, video_id, note=None, query=None): + def _call_api(self, path, video_id, note=None, query=None, fatal=True): return self._download_json( - f'{self._preconnect_link}/{path}', video_id, note, query=query) + f'{self._preconnect_link}/{path}', video_id, note, query=query, fatal=fatal) class FifaIE(FifaBaseIE): From ac5c846921ad82b6aa3031a75c77cce3c3e6a602 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Mon, 28 Oct 2024 00:31:55 +0000 Subject: [PATCH 05/11] fix usage of self._merge_subtitles --- yt_dlp/extractor/fifa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index 967574475a..d1eab97630 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -87,11 +87,11 @@ def _real_extract(self, url): if ext == 'mpd': fmts, subs = self._extract_mpd_formats_and_subtitles(playlist_url, video_id) formats.extend(fmts) - self._merge_subtitles(subs, subtitles) + self._merge_subtitles(subs, target=subtitles) elif ext == 'm3u8': fmts, subs = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id, m3u8_id='hls') formats.extend(fmts) - self._merge_subtitles(subs, subtitles) + self._merge_subtitles(subs, target=subtitles) else: raise ExtractorError(f'Unknown playlist URL {playlist_url}', video_id=video_id) From 5f6a442047b6a129a1646c6f51bbe40f18430ded Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 30 Oct 2024 16:04:00 +0000 Subject: [PATCH 06/11] Rewrite code; DRM content is being only matching --- yt_dlp/extractor/_extractors.py | 4 +- yt_dlp/extractor/fifa.py | 422 +++++++++++++------------------- 2 files changed, 175 insertions(+), 251 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 8f79d38016..5515750bfb 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -646,9 +646,7 @@ from .fifa import ( FifaArticleIE, FifaContentIE, - FifaIE, - FifaMovieIE, - FifaSeriesIE, + FifaPlayerIE, ) from .filmon import ( FilmOnChannelIE, diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index d1eab97630..7ee39b5f86 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -5,6 +5,7 @@ from ..utils import ( ExtractorError, determine_ext, + float_or_none, int_or_none, parse_iso8601, strip_or_none, @@ -14,9 +15,7 @@ from ..utils.traversal import traverse_obj -class FifaContentIE(InfoExtractor): - _VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?P\w{2})/content/(?P[\w-]+)/(?P[\w-]+)/?(?:[#?]|$)' - +class FifaBaseIE(InfoExtractor): def _real_initialize(self): self._HEADERS = { 'content-type': 'application/json; charset=UTF-8', @@ -54,18 +53,12 @@ def _real_initialize(self): def _call_api(self, path, video_id, note=None, headers=None, query=None, data=None): return self._download_json( - f'https://www.plus.fifa.com/flux-capacitor/api/v1//{path}', video_id, note, headers={ + f'https://www.plus.fifa.com/{path}', video_id, note, headers={ **self._HEADERS, **(headers or {}), }, query=query, data=data) - def _real_extract(self, url): - urlh = self._request_webpage(url, self._match_id(url)) - video_id, display_id, locale = self._match_valid_url(urlh.url).group('id', 'display_id', 'locale') - - video_info = self._call_api( - 'videoasset', video_id, 'Downloading video asset', query={'catalog': video_id})[0] - + def _extract_video(self, video_info, video_id): formats = [] subtitles = {} @@ -74,12 +67,11 @@ def _real_extract(self, url): 'mpd/cenc+h264;q=0.9, mpd/clear+h264;q=0.7, mp4/;q=0.1', ]: session_info = self._call_api( - 'streaming/session', video_id, 'Getting streaming session', + 'flux-capacitor/api/v1/streaming/session', video_id, 'Getting streaming session', headers={'x-chili-accept-stream': stream_type}, data=json.dumps({'videoAssetId': video_info['id'], 'autoPlay': False}).encode()) - streams_info = self._call_api( - 'streaming/urls', video_id, 'Getting streaming urls', + 'flux-capacitor/api/v1/streaming/urls', video_id, 'Getting streaming urls', headers={'x-chili-streaming-session': session_info['id']}) for playlist_url in traverse_obj(streams_info, (..., 'url')): @@ -99,10 +91,11 @@ def _real_extract(self, url): return { 'id': video_id, - 'title': video_info['title'], - 'display_id': display_id, + 'title': strip_or_none(video_info['title']), + 'duration': float_or_none(video_info.get('duration'), scale=1000), 'formats': formats, 'subtitles': subtitles, + 'age_limit': traverse_obj(video_info, ('parental', 'age', {int_or_none})), 'thumbnails': [{ 'url': update_url_query(x, {'width': 1408}), 'width': 1408, @@ -110,161 +103,155 @@ def _real_extract(self, url): } -class FifaBaseIE(InfoExtractor): - @functools.cached_property - def _preconnect_link(self): - return self._search_regex( - r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', - self._download_webpage('https://fifa.com/', None), 'Preconnect Link') - - def _call_api(self, path, video_id, note=None, query=None, fatal=True): - return self._download_json( - f'{self._preconnect_link}/{path}', video_id, note, query=query, fatal=fatal) - - -class FifaIE(FifaBaseIE): - _VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P\w{2})/watch/(?P[-\w]+)/?(?:[#?]|$)' +class FifaPlayerIE(FifaBaseIE): + _VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?:\w{2})/player/(?P[\w-]+)/?\?(?:[^#]+&)?catalogId=(?P[\w-]+)' _TESTS = [{ - 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y', + 'url': 'https://www.plus.fifa.com/en/player/f67b9d46-38c3-4e38-bbf3-89cf14cbcc1a?catalogId=b9c32230-1426-46d0-8448-ca824ae48603&entryPoint=Slider', 'info_dict': { - 'id': 'fee2f7e8-92fa-42c5-805c-a2c949015eae', - 'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay', - 'display_id': 'italy-v-france-final-2006-fifa-world-cup-germany-full-match-replay', - 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', - }, - 'params': { - 'skip_download': 'm3u8', - 'ignore_no_formats_error': True, - }, - 'expected_warnings': [ - 'Requested format is not available', - 'This video is DRM protected', - ], - }, { - 'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV', - 'info_dict': { - 'id': 'd4f4a2cb-5966-4af7-8a05-98ef4732af2b', - 'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights', - 'display_id': 'brasil-x-alemanha-semifinais-copa-do-mundo-fifa-brasil-2014-compacto', - 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', - }, - 'params': { - 'skip_download': 'm3u8', - 'ignore_no_formats_error': True, - }, - 'expected_warnings': [ - 'Requested format is not available', - 'This video is DRM protected', - ], - }, { - 'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp', - 'info_dict': { - 'id': '3C6gQH9C2DLwzNx7BMRQdp', + 'id': 'f67b9d46-38c3-4e38-bbf3-89cf14cbcc1a', 'ext': 'mp4', - 'title': 'Josimar goal against Northern Ireland | Classic Goals', + 'title': 'Trailer | HD Cutz', + 'age_limit': 0, + 'duration': 195.84, + 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', }, 'params': {'skip_download': 'm3u8'}, - 'skip': 'HTTP Error 403: Forbidden', }, { - 'url': 'https://www.fifa.com/fifaplus/en/watch/2KhLLn6aiGW3nr8sNm8Hkv', + 'url': 'https://www.plus.fifa.com/en/player/af65939f-bbce-4b8f-8462-5140af533c5f?catalogId=fac6685c-a900-4e78-b5cd-192af5131ffe&entryPoint=Slider', + 'md5': '2c4f5c591448d372f6ba85b8f3be37df', 'info_dict': { - 'id': '2KhLLn6aiGW3nr8sNm8Hkv', + 'id': 'af65939f-bbce-4b8f-8462-5140af533c5f', 'ext': 'mp4', - 'title': "Le Sommer: Lyon-Barcelona a beautiful final for women's football", - }, - 'params': {'skip_download': 'm3u8'}, - 'skip': 'HTTP Error 403: Forbidden', - }, { - 'url': 'https://www.fifa.com/fifaplus/en/watch/4V8H8qv7QM1LNVk5gUwYFa', - 'info_dict': { - 'id': '709abaec-5eef-4ad8-a02d-19a8932f42a2', - 'title': "Christine Sinclair at 19 | FIFA U-19 Women's World Championship Canada 2002™", - 'display_id': 'christine-sinclair-at-19-fifa-u-19-womens-world-championship-canada-2002', + 'title': 'Trailer | Bravas de Juárez', + 'age_limit': 0, + 'duration': 73.984, 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', }, - 'params': { - 'skip_download': 'm3u8', - 'ignore_no_formats_error': True, - }, - 'expected_warnings': [ - 'Requested format is not available', - 'This video is DRM protected', - ], }, { - 'url': 'https://www.fifa.com/fifaplus/en/watch/d85632f9-7009-4ea0-aaf1-8d6847e4a148', - 'info_dict': { - 'id': 'bbe5d2a3-3dfd-4283-a1af-3a66022e8254', - 'title': 'Croatia v Australia | Group F | 2006 FIFA World Cup Germany™ | Full Match Replay', - 'display_id': 'croatia-v-australia-or-group-f-or-2006-fifa-world-cup', - 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', - }, - 'params': { - 'skip_download': 'm3u8', - 'ignore_no_formats_error': True, - }, - 'expected_warnings': [ - 'Requested format is not available', - 'This video is DRM protected', - ], - }, { - 'url': 'https://www.fifa.com/fifaplus/pt/watch/Ny88zzqsVnxCBUJ6fZzPy', - 'info_dict': { - 'id': '3d2612ff-c06f-4a7e-a2d7-ec73504515b5', - 'title': 'The Happiest Man in the World', - 'display_id': 'o-homem-mais-feliz-do-mundo', - 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', - }, - 'params': { - 'skip_download': 'm3u8', - 'ignore_no_formats_error': True, - }, - 'expected_warnings': [ - 'Requested format is not available', - 'This video is DRM protected', - ], + 'url': 'https://plus.fifa.com/en/player/eeebdd38-5d51-4891-8307-ab5dd62c2c32?catalogId=ed3b2dcb-6886-4b34-8ba7-c8800027f7dd', + 'only_matching': True, }] def _real_extract(self, url): - video_id, locale = self._match_valid_url(url).group('id', 'locale') + video_id, catelog_id = self._match_valid_url(url).group('id', 'display_id') + video_asset = self._call_api( + 'flux-capacitor/api/v1/videoasset', video_id, + 'Downloading video asset', query={'catalog': catelog_id}) + video_info = traverse_obj(video_asset, (lambda _, v: v['id'] == video_id), get_all=False) + if not video_info: + raise ExtractorError('Unable to extract video info') + return self._extract_video(video_info, video_id) - if redirect_url := traverse_obj(self._call_api( - f'pages/{locale}/watch/{video_id}', video_id, 'Downloading redirection info'), 'redirectUrl'): - return self.url_result(redirect_url) + +class FifaContentIE(FifaBaseIE): + _VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?:\w{2})/content/(?P[\w-]+)/(?P[\w-]+)/?(?:[#?]|$)' + _TESTS = [{ + # from https://www.fifa.com/fifaplus/en/watch/series/48PQFX2J4TiDJcxWOxUPho/2ka5yomq8MBvfxe205zdQ9/6H72309PLWXafBIavvPzPQ#ReadMore + 'url': 'https://www.plus.fifa.com/en/content/kariobangi/6f3be63f-76d9-4290-9e60-fd62afa95ed7', + 'info_dict': { + 'id': '6f3be63f-76d9-4290-9e60-fd62afa95ed7', + 'title': 'Kariobangi', + 'description': 'md5:b57eb012db2b84d482adedda82faf1c8', + 'display_id': 'kariobangi', + 'thumbnails': 'count:2', + }, + 'playlist_count': 0, + }, { + # from https://www.fifa.com/fifaplus/en/watch/series/5Ja1dDLuudkFF95OVHcYBG/5epcWav73zMbjTJh2RxIOt/1NIHdDxPlYodbNobjS1iX5 + 'url': 'https://www.plus.fifa.com/en/content/hd-cutz/b9c32230-1426-46d0-8448-ca824ae48603', + 'info_dict': { + 'id': 'b9c32230-1426-46d0-8448-ca824ae48603', + 'title': 'HD Cutz', + 'description': 'md5:86dd1e6d9b4463b3ccc2063ab3180c44', + 'display_id': 'hd-cutz', + 'thumbnails': 'count:2', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'b9c32230-1426-46d0-8448-ca824ae48603', + 'ext': 'mp4', + 'title': 'Trailer | HD Cutz', + 'age_limit': 0, + 'duration': 195.840, + 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', + }, + }], + 'params': {'skip_download': 'm3u8'}, + }, { + # from https://www.fifa.com/fifaplus/en/watch/movie/2OFuZ9TGyPH6x7nZsgnVBN + 'url': 'https://www.plus.fifa.com/en/content/bravas-de-juarez/fac6685c-a900-4e78-b5cd-192af5131ffe', + 'info_dict': { + 'id': 'fac6685c-a900-4e78-b5cd-192af5131ffe', + 'title': 'Bravas de Juárez', + 'description': 'md5:e48e0f56fb27ac334e616976e0e62362', + 'display_id': 'bravas-de-juarez', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'fac6685c-a900-4e78-b5cd-192af5131ffe', + 'ext': 'mp4', + 'title': 'Trailer | Bravas de Juárez', + 'age_limit': 0, + 'duration': 73.984, + 'thumbnail': r're:https://cdn\.plus\.fifa\.com//images/public/cms/[/\w-]+\.jpg\?width=1408', + }, + }], + }] + _WEBPAGE_TESTS = [{ + # https://www.plus.fifa.com/en/content/le-moment-the-official-film-of-the-2019-fifa-womens-world-cup/68a89002-0182-4cc7-b858-e548de0fb9cc + 'url': 'https://www.fifa.com/fifaplus/en/watch/movie/01ioUo8QHiajSisrvP3ES2', + 'info_dict': { + 'id': '68a89002-0182-4cc7-b858-e548de0fb9cc', + 'title': 'Le Moment', + 'description': 'md5:155f0c28ea9de733668d7eb1f7dbcb52', + 'display_id': 'le-moment-the-official-film-of-the-2019-fifa-womens-world-cup', + }, + 'playlist_count': 0, + }, { + # https://www.plus.fifa.com/en/content/dreams-2018-fifa-world-cup-official-film/ebdce1da-ab82-4c0b-a7d3-b4fc71030339 + 'url': 'https://www.fifa.com/fifaplus/en/watch/movie/69GbI9lVcwhOeBvea5eKUB', + 'info_dict': { + 'id': 'ebdce1da-ab82-4c0b-a7d3-b4fc71030339', + 'title': 'Dreams', + 'description': 'md5:b795d218d5c2b88bff3c1569cb617acb', + 'display_id': 'dreams-2018-fifa-world-cup-official-film', + }, + 'playlist_count': 0, + }] + + def _entries(self, video_asset, video_id): + for video_info in traverse_obj(video_asset, (lambda _, v: v['type'] == 'TRAILER', {dict})): + yield self._extract_video(video_info, video_id) + + def _real_extract(self, url): urlh = self._request_webpage(url, self._match_id(url)) - if urlh.url != url: - return self.url_result(urlh.url) + video_id, display_id = self._match_valid_url(urlh.url).group('id', 'display_id') - video_details = self._call_api( - f'sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) + video_content = self._call_api( + f'entertainment/api/v1/contents/{video_id}', video_id, 'Downloading video content') + video_asset = self._call_api( + 'flux-capacitor/api/v1/videoasset', video_id, + 'Downloading video asset', query={'catalog': video_id}) - preplay_parameters = self._call_api( - f'videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters'] + thumbnails = [] + for key, width in [('coverUrl', 330), ('wideCoverUrl', 1408)]: + if thumbnail_url := video_content.get(key): + thumbnails.append({ + 'url': update_url_query(thumbnail_url, {'width': width}), + 'width': width, + }) - content_data = self._download_json( - 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters), - video_id, 'Downloading Content Data') - - formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) - - return { - 'id': video_id, - 'title': video_details.get('title'), - 'description': video_details.get('description'), - 'duration': int_or_none(video_details.get('duration')), - 'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')), - 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)), - 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')), - 'formats': formats, - 'subtitles': subtitles, - } + return self.playlist_result( + self._entries(video_asset, video_id), video_id, + strip_or_none(video_content['title']), strip_or_none(video_content.get('storyLine')), + display_id=display_id, thumbnails=thumbnails) -class FifaArticleIE(FifaBaseIE): +class FifaArticleIE(InfoExtractor): _VALID_URL = r'https?://(www\.)?fifa\.com/(fifaplus/)?(?P\w{2})/articles/(?P[\w-]+)' _TESTS = [{ 'url': 'https://www.fifa.com/en/articles/foord-talks-2023-and-battling-kerr-for-the-wsl-title', 'info_dict': { - '_type': 'multi_video', 'id': 'foord-talks-2023-and-battling-kerr-for-the-wsl-title', 'title': 'Foord talks 2023 and battling Kerr for the WSL title', 'timestamp': 1651136400, @@ -293,18 +280,47 @@ class FifaArticleIE(FifaBaseIE): }], 'params': {'skip_download': 'm3u8'}, }, { + # https://www.fifa.com/en/articles/stars-set-to-collide-in-uwcl-final 'url': 'https://www.fifa.com/fifaplus/en/articles/stars-set-to-collide-in-uwcl-final', - 'info_dict': { - '_type': 'multi_video', - 'id': 'stars-set-to-collide-in-uwcl-final', - 'title': 'Stars set to collide in Women’s Champions League final ', - 'timestamp': 1652950800, - 'upload_date': '20220519', - }, - 'playlist_count': 3, - 'params': {'skip_download': 'm3u8'}, + 'only_matching': True, }] + @functools.cached_property + def _preconnect_link(self): + return self._search_regex( + r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', + self._download_webpage('https://fifa.com/', None), 'Preconnect Link') + + def _call_api(self, path, video_id, note=None, query=None, fatal=True): + return self._download_json( + f'{self._preconnect_link}/{path}', video_id, note, query=query, fatal=fatal) + + def _entries(self, video_ids, article_id): + for video_id in video_ids: + video_details = self._call_api( + f'sections/videoDetails/{video_id}', article_id, + 'Downloading Video Details', fatal=False) + + preplay_parameters = self._call_api( + f'videoPlayerData/{video_id}', article_id, + 'Downloading Preplay Parameters')['preplayParameters'] + content_data = self._download_json( + 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format( + **preplay_parameters), article_id, 'Downloading Content Data') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], article_id) + + yield { + 'id': video_id, + 'title': video_details.get('title'), + 'description': video_details.get('description'), + 'duration': int_or_none(video_details.get('duration')), + 'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')), + 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)), + 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')), + 'formats': formats, + 'subtitles': subtitles, + } + def _real_extract(self, url): article_id, locale = self._match_valid_url(url).group('id', 'locale') @@ -319,96 +335,6 @@ def _real_extract(self, url): 'richtext', 'content', lambda _, v: v['data']['target']['contentTypesCheckboxValue'] == 'Video', 'data', 'target', 'sys', 'id'))) - return self.playlist_from_matches( - video_ids, article_id, page_info.get('articleTitle'), - getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}', - ie=FifaIE, multi_video=True, timestamp=parse_iso8601(page_info.get('articlePublishedDate'))) - - -class FifaMovieIE(FifaBaseIE): - _VALID_URL = r'https?://(www\.)?fifa\.com/fifaplus/(?P\w{2})/watch/movie/(?P\w+)[/?\?\#]?' - _TESTS = [{ - 'url': 'https://www.fifa.com/fifaplus/en/watch/movie/2OFuZ9TGyPH6x7nZsgnVBN', - 'info_dict': { - '_type': 'multi_video', - 'id': '2OFuZ9TGyPH6x7nZsgnVBN', - 'title': 'Bravas de Juárez', - 'description': 'md5:1c36885f34d1c142f66ddd5acd5226b2', - }, - 'playlist_count': 2, - }, { - 'url': 'https://www.fifa.com/fifaplus/en/watch/movie/01ioUo8QHiajSisrvP3ES2', - 'info_dict': { - '_type': 'multi_video', - 'id': '01ioUo8QHiajSisrvP3ES2', - 'title': 'Le Moment | The Official Film of the 2019 FIFA Women’s World Cup™', - 'description': 'md5:fbc803feb6fcbc82d2a73e914244484c', - }, - 'playlist_count': 1, - }, { - 'url': 'https://www.fifa.com/fifaplus/en/watch/movie/69GbI9lVcwhOeBvea5eKUB', - 'info_dict': { - '_type': 'multi_video', - 'id': '69GbI9lVcwhOeBvea5eKUB', - 'title': 'Dreams | The Official Film of the 2018 FIFA World Cup™', - 'description': 'md5:e79dd17af4dcab1dd446ef6e22a79330', - }, - 'playlist_count': 1, - }] - - def _real_extract(self, url): - movie_id, locale = self._match_valid_url(url).group('id', 'locale') - - movie_details = self._call_api( - f'sections/movieDetails/{movie_id}', movie_id, 'Downloading Movie Details', query={'locale': locale}) - - video_ids = traverse_obj(movie_details, ('trailers', ..., 'entryId')) - if video_entry_id := traverse_obj(movie_details, ('video', 'videoEntryId')): - video_ids.append(video_entry_id) - - return self.playlist_from_matches( - video_ids, movie_id, traverse_obj(movie_details, ('video', 'title')), - getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}', - ie=FifaIE, multi_video=True, playlist_description=traverse_obj(movie_details, ('video', 'description'))) - - -class FifaSeriesIE(FifaBaseIE): - _VALID_URL = r'https?://(www\.)?fifa\.com/fifaplus/(?P\w{2})/watch/series/(?P\w+)/(?P\w+)/(?P\w+)[/?\?\#]?' - _TESTS = [{ - 'url': 'https://www.fifa.com/fifaplus/en/watch/series/48PQFX2J4TiDJcxWOxUPho/2ka5yomq8MBvfxe205zdQ9/6H72309PLWXafBIavvPzPQ#ReadMore', - 'info_dict': { - '_type': 'multi_video', - 'id': '48PQFX2J4TiDJcxWOxUPho', - 'title': 'Episode 1 | Kariobangi', - 'description': 'md5:ecbc8668f828d3cc2c0d00edcc0af04f', - }, - 'playlist_count': 4, - }, { - 'url': 'https://www.fifa.com/fifaplus/en/watch/series/5Ja1dDLuudkFF95OVHcYBG/5epcWav73zMbjTJh2RxIOt/1NIHdDxPlYodbNobjS1iX5', - 'info_dict': { - '_type': 'multi_video', - 'id': '5Ja1dDLuudkFF95OVHcYBG', - 'title': 'Paul Pogba and Aaron Wan Bissaka | HD Cutz', - 'description': 'md5:16dc373774f503ef91f4489ca17c3f49', - }, - 'playlist_count': 10, - }] - - def _real_extract(self, url): - series_id, locale, season_id, episode_id = self._match_valid_url(url).group('serie_id', 'locale', 'season_id', 'episode_id') - - serie_details = self._call_api( - 'sections/videoEpisodeDetails', series_id, 'Downloading Serie Details', query={ - 'locale': locale, - 'seriesId': series_id, - 'seasonId': season_id, - 'episodeId': episode_id, - }) - - video_ids = traverse_obj(serie_details, ('seasons', ..., 'episodes', ..., 'entryId')) - video_ids.extend(traverse_obj(serie_details, ('trailers', ..., 'entryId'))) - - return self.playlist_from_matches( - video_ids, series_id, strip_or_none(serie_details.get('title')), - getter=lambda x: f'https://www.fifa.com/fifaplus/{locale}/watch/{x}', - ie=FifaIE, multi_video=True, playlist_description=strip_or_none(serie_details.get('description'))) + return self.playlist_result( + self._entries(video_ids, article_id), article_id, page_info.get('articleTitle'), + timestamp=parse_iso8601(page_info.get('articlePublishedDate'))) From 77debf548f1aac95265ebe51e31a71522442d31e Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 30 Oct 2024 17:42:19 +0000 Subject: [PATCH 07/11] FifaContent: no positive redirection detection --- yt_dlp/extractor/fifa.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index 7ee39b5f86..dc6bf082a6 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -224,8 +224,7 @@ def _entries(self, video_asset, video_id): yield self._extract_video(video_info, video_id) def _real_extract(self, url): - urlh = self._request_webpage(url, self._match_id(url)) - video_id, display_id = self._match_valid_url(urlh.url).group('id', 'display_id') + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') video_content = self._call_api( f'entertainment/api/v1/contents/{video_id}', video_id, 'Downloading video content') From 81075d168e4fb101cd5e5607465175d55e7ad191 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 31 Oct 2024 01:02:10 +0000 Subject: [PATCH 08/11] fix typo; compat code --- yt_dlp/extractor/fifa.py | 55 ++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index dc6bf082a6..ebb9ade261 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -16,24 +16,31 @@ class FifaBaseIE(InfoExtractor): + _HEADERS = { + 'content-type': 'application/json; charset=UTF-8', + 'x-chili-accept-language': 'en', + 'x-chili-api-version': '1.1', + 'x-chili-authenticated': 'false', + 'x-chili-device-id': 'undefined', + 'x-chili-device-profile': 'WEB', + 'x-chili-device-store': 'CHILI', + 'x-chili-user-country': 'US', + 'x-chili-accept-stream-mode': 'multi/codec-compatibility;q=0.8, mono/strict;q=0.7', + 'x-chili-avod-compatibility': 'free,free-ads', + 'x-chili-manifest-properties': 'subtitles', + 'x-chili-streaming-proto': 'https', + } + + def _call_api(self, path, video_id, note=None, headers=None, query=None, data=None): + return self._download_json( + f'https://www.plus.fifa.com/{path}', video_id, note, headers={ + **self._HEADERS, + **(headers or {}), + }, query=query, data=data) + def _real_initialize(self): - self._HEADERS = { - 'content-type': 'application/json; charset=UTF-8', - 'x-chili-accept-language': 'en', - 'x-chili-api-version': '1.1', - 'x-chili-authenticated': 'false', - 'x-chili-device-id': 'undefined', - 'x-chili-device-profile': 'WEB', - 'x-chili-device-store': 'CHILI', - 'x-chili-user-country': 'US', - 'x-chili-accept-stream-mode': 'multi/codec-compatibility;q=0.8, mono/strict;q=0.7', - 'x-chili-avod-compatibility': 'free,free-ads', - 'x-chili-manifest-properties': 'subtitles', - 'x-chili-streaming-proto': 'https', - } - device_info = self._download_json( - 'https://www.plus.fifa.com/gatekeeper/api/v1/devices/', None, 'Getting device info', - headers=self._HEADERS, + device_info = self._call_api( + 'gatekeeper/api/v1/devices/', None, 'Getting device info', data=json.dumps({ 'appVersion': '2.6.93', 'displayName': None, @@ -51,13 +58,6 @@ def _real_initialize(self): }).encode()) self._HEADERS['x-chili-device-id'] = device_info['id'] - def _call_api(self, path, video_id, note=None, headers=None, query=None, data=None): - return self._download_json( - f'https://www.plus.fifa.com/{path}', video_id, note, headers={ - **self._HEADERS, - **(headers or {}), - }, query=query, data=data) - def _extract_video(self, video_info, video_id): formats = [] subtitles = {} @@ -104,7 +104,7 @@ def _extract_video(self, video_info, video_id): class FifaPlayerIE(FifaBaseIE): - _VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?:\w{2})/player/(?P[\w-]+)/?\?(?:[^#]+&)?catalogId=(?P[\w-]+)' + _VALID_URL = r'https?://(www\.)?plus\.fifa\.com/(?:\w{2})/player/(?P[\w-]+)/?\?(?:[^#]+&)?catalogId=(?P[\w-]+)' _TESTS = [{ 'url': 'https://www.plus.fifa.com/en/player/f67b9d46-38c3-4e38-bbf3-89cf14cbcc1a?catalogId=b9c32230-1426-46d0-8448-ca824ae48603&entryPoint=Slider', 'info_dict': { @@ -133,10 +133,10 @@ class FifaPlayerIE(FifaBaseIE): }] def _real_extract(self, url): - video_id, catelog_id = self._match_valid_url(url).group('id', 'display_id') + video_id, catalog_id = self._match_valid_url(url).group('id', 'catalog_id') video_asset = self._call_api( 'flux-capacitor/api/v1/videoasset', video_id, - 'Downloading video asset', query={'catalog': catelog_id}) + 'Downloading video asset', query={'catalog': catalog_id}) video_info = traverse_obj(video_asset, (lambda _, v: v['id'] == video_id), get_all=False) if not video_info: raise ExtractorError('Unable to extract video info') @@ -329,7 +329,6 @@ def _real_extract(self, url): video_ids = [] if hero_video_entry_id := page_info.get('heroVideoEntryId'): video_ids.append(hero_video_entry_id) - video_ids.extend(traverse_obj(page_info, ( 'richtext', 'content', lambda _, v: v['data']['target']['contentTypesCheckboxValue'] == 'Video', 'data', 'target', 'sys', 'id'))) From 971bf92c064b70e1ba01bfbcd7da79f3a0a18e6d Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 31 Oct 2024 01:12:22 +0000 Subject: [PATCH 09/11] code style --- yt_dlp/extractor/fifa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index ebb9ade261..cbdc983498 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -67,8 +67,8 @@ def _extract_video(self, video_info, video_id): 'mpd/cenc+h264;q=0.9, mpd/clear+h264;q=0.7, mp4/;q=0.1', ]: session_info = self._call_api( - 'flux-capacitor/api/v1/streaming/session', video_id, 'Getting streaming session', - headers={'x-chili-accept-stream': stream_type}, + 'flux-capacitor/api/v1/streaming/session', video_id, + 'Getting streaming session', headers={'x-chili-accept-stream': stream_type}, data=json.dumps({'videoAssetId': video_info['id'], 'autoPlay': False}).encode()) streams_info = self._call_api( 'flux-capacitor/api/v1/streaming/urls', video_id, 'Getting streaming urls', From 98e034ff8c6462685705c8f0948b65b28c23f8ae Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 31 Oct 2024 01:23:23 +0000 Subject: [PATCH 10/11] comment about DRM --- yt_dlp/extractor/fifa.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index cbdc983498..83b43d98e8 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -220,6 +220,7 @@ class FifaContentIE(FifaBaseIE): }] def _entries(self, video_asset, video_id): + # trailers are non-DRM'd for video_info in traverse_obj(video_asset, (lambda _, v: v['type'] == 'TRAILER', {dict})): yield self._extract_video(video_info, video_id) From d5df68ed8177f2d9f3136762a47eba7b56adbd25 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:02:59 +0000 Subject: [PATCH 11/11] new test, new style. --- yt_dlp/extractor/fifa.py | 92 +++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index 83b43d98e8..beacb9d8e7 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -11,6 +11,7 @@ strip_or_none, unified_timestamp, update_url_query, + url_or_none, ) from ..utils.traversal import traverse_obj @@ -90,16 +91,18 @@ def _extract_video(self, video_info, video_id): self._remove_duplicate_formats(formats) return { + **traverse_obj(video_info, { + 'title': ('title', {strip_or_none}), + 'duration': ('duration', {float_or_none(scale=1000)}), + 'age_limit': ('parental', 'age', {int_or_none}), + 'thumbnails': ( + ('posterUrl', 'wideCoverUrl'), {update_url_query(query={'width': 1408})}, + {lambda x: {'url': x, 'width': 1408}}, + ), + }), 'id': video_id, - 'title': strip_or_none(video_info['title']), - 'duration': float_or_none(video_info.get('duration'), scale=1000), 'formats': formats, 'subtitles': subtitles, - 'age_limit': traverse_obj(video_info, ('parental', 'age', {int_or_none})), - 'thumbnails': [{ - 'url': update_url_query(x, {'width': 1408}), - 'width': 1408, - } for x in [video_info.get('posterUrl'), video_info.get('wideCoverUrl')] if x], } @@ -258,16 +261,6 @@ class FifaArticleIE(InfoExtractor): 'upload_date': '20220428', }, 'playlist': [{ - 'info_dict': { - 'id': '6B2xtOT2SDMB4JeF3i9n2y', - 'ext': 'mp4', - 'title': 'Foord & Kerr: Friends and rivals', - 'description': 'md5:756e14e1814196948ec4d2a9663f7214', - 'duration': 82, - 'categories': ['News', 'Interview'], - 'thumbnail': r're:https://digitalhub\.fifa\.com/transform/[^/]+/\w+', - }, - }, { 'info_dict': { 'id': 'R2Y1vbwvggrlSr02Cfr99', 'ext': 'mp4', @@ -277,8 +270,32 @@ class FifaArticleIE(InfoExtractor): 'categories': ['News', 'Interview'], 'thumbnail': r're:https://digitalhub\.fifa\.com/transform/[^/]+/\w+', }, + }, { + 'info_dict': { + 'id': '6B2xtOT2SDMB4JeF3i9n2y', + 'ext': 'mp4', + 'title': 'Foord & Kerr: Friends and rivals', + 'description': 'md5:756e14e1814196948ec4d2a9663f7214', + 'duration': 82, + 'categories': ['News', 'Interview'], + 'thumbnail': r're:https://digitalhub\.fifa\.com/transform/[^/]+/\w+', + }, }], 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.fifa.com/en/articles/100-great-world-cup-moments-qatar-2022-11-pele-1958-sweden-youngest-scorer', + 'md5': '2df0f3303650c5f4ee21bec24500bad3', + 'info_dict': { + 'id': '100-great-world-cup-moments-qatar-2022-11-pele-1958-sweden-youngest-scorer', + 'ext': 'mp4', + 'title': 'O Rei arrives in style (11) | 100 great World Cup moments', + 'description': 'Watch some of the best goals ever scored in the FIFA World Cup™.', + 'duration': 30, + 'categories': ['FIFA Tournaments'], + 'thumbnail': r're:https://digitalhub\.fifa\.com/transform/[^/]+/\w+', + 'timestamp': 1667988000, + 'upload_date': '20221109', + }, }, { # https://www.fifa.com/en/articles/stars-set-to-collide-in-uwcl-final 'url': 'https://www.fifa.com/fifaplus/en/articles/stars-set-to-collide-in-uwcl-final', @@ -310,13 +327,15 @@ def _entries(self, video_ids, article_id): formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], article_id) yield { + **traverse_obj(video_details, { + 'title': 'title', + 'description': 'description', + 'duration': ('duration', {int_or_none}), + 'release_timestamp': ('dateOfRelease', {unified_timestamp}), + 'categories': (('videoCategory', 'videoSubcategory'), all, ..., {str}), + 'thumbnail': ('backgroundImage', 'src', {url_or_none}), + }), 'id': video_id, - 'title': video_details.get('title'), - 'description': video_details.get('description'), - 'duration': int_or_none(video_details.get('duration')), - 'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')), - 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)), - 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')), 'formats': formats, 'subtitles': subtitles, } @@ -327,13 +346,26 @@ def _real_extract(self, url): page_id = self._call_api(f'pages/en/articles/{article_id}', article_id)['pageId'] page_info = self._call_api(f'sections/article/{page_id}', article_id, query={'locale': locale}) - video_ids = [] + video_ids = traverse_obj(page_info, ( + 'richtext', 'content', lambda _, v: v['data']['target']['contentTypesCheckboxValue'] == 'Video', + 'data', 'target', 'sys', 'id')) if hero_video_entry_id := page_info.get('heroVideoEntryId'): video_ids.append(hero_video_entry_id) - video_ids.extend(traverse_obj(page_info, ( - 'richtext', 'content', lambda _, v: v['data']['target']['contentTypesCheckboxValue'] == 'Video', - 'data', 'target', 'sys', 'id'))) - return self.playlist_result( - self._entries(video_ids, article_id), article_id, page_info.get('articleTitle'), - timestamp=parse_iso8601(page_info.get('articlePublishedDate'))) + entries = list(self._entries(video_ids, article_id)) + + common_info = { + **traverse_obj(page_info, { + 'title': 'articleTitle', + 'timestamp': ('articlePublishedDate', {parse_iso8601}), + }), + 'id': article_id, + } + + if len(entries) == 1: + return { + **entries[0], + **common_info, + } + + return self.playlist_result(entries, **common_info)