From 960b8931c6bedffc186ddbc576f62089f0d65234 Mon Sep 17 00:00:00 2001 From: lonm Date: Wed, 15 May 2024 16:39:56 +0100 Subject: [PATCH 01/16] Fix podcast and person playlist downloads --- yt_dlp/extractor/radiofrance.py | 104 +++++++++++++++++++++++--------- 1 file changed, 77 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index 6bd6fe9b68..3ada312ba5 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -267,10 +267,10 @@ def _real_extract(self, url): class RadioFrancePlaylistBaseIE(RadioFranceBaseIE): """Subclasses must set _METADATA_KEY""" - def _call_api(self, content_id, cursor, page_num): + def _call_api(self, station, content_id, cursor): raise NotImplementedError('This method must be implemented by subclasses') - def _generate_playlist_entries(self, content_id, content_response): + def _generate_playlist_entries(self, station, content_id, content_response): for page_num in itertools.count(2): for entry in content_response['items']: yield self.url_result( @@ -281,28 +281,39 @@ def _generate_playlist_entries(self, content_id, content_response): 'thumbnail': ('visual', 'src'), })) - next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False) - if not next_cursor: + if not content_response["next"]: break - content_response = self._call_api(content_id, next_cursor, page_num) + content_response = self._call_api(station, content_id, content_response["next"]) def _real_extract(self, url): - display_id = self._match_id(url) + playlist_id = self._match_id(url) + # If it is a podcast playlist, get the name of the station it is on + # profile page playlists are not attached to a station currently + station = self._match_valid_url(url).group('station') if isinstance(self, RadioFrancePodcastIE) else None - metadata = self._download_json( - 'https://www.radiofrance.fr/api/v2.1/path', display_id, - query={'value': urllib.parse.urlparse(url).path})['content'] + # Check if user started request from a page other than 1 + startpage = 1 + parsedurl = urllib.parse.urlparse(url) + if parsedurl.query: + startpagequery = urllib.parse.parse_qs(parsedurl.query) + if 'p' in startpagequery: + startpage = int(startpagequery['p'][0]) - content_id = metadata['id'] + # Get data for the first page, and the uuid for the playlist + metadata = self._call_api(station, playlist_id, startpage) + uuid = traverse_obj(metadata, ('metadata', 'id')) + # This method should return the final playlist metadata which yt-dlp can then use to download everything return self.playlist_result( - self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id, - display_id=display_id, **{**traverse_obj(metadata, { + self._generate_playlist_entries(station, playlist_id, metadata), + uuid, + display_id=playlist_id, + **{**traverse_obj(metadata['metadata'], { 'title': 'title', 'description': 'standFirst', 'thumbnail': ('visual', 'src'), - }), **traverse_obj(metadata, { + }), **traverse_obj(metadata['metadata'], { 'title': 'name', 'description': 'role', })}) @@ -311,7 +322,7 @@ def _real_extract(self, url): class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE): _VALID_URL = rf'''(?x) {RadioFranceBaseIE._VALID_URL_BASE} - /(?:{RadioFranceBaseIE._STATIONS_RE}) + /(?P{RadioFranceBaseIE._STATIONS_RE}) /podcasts/(?P[\w-]+)/?(?:[?#]|$) ''' @@ -363,10 +374,27 @@ class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE): _METADATA_KEY = 'expressions' - def _call_api(self, podcast_id, cursor, page_num): - return self._download_json( - f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id, - note=f'Downloading page {page_num}', query={'pageCursor': cursor}) + def _call_api(self, station, podcast_id, cursor): + # The data is stored in the last