From 1597bcf6604fe377c870aab63002e3ad42610ac1 Mon Sep 17 00:00:00 2001 From: "lauren n. liberda" Date: Sat, 16 Mar 2024 00:20:38 +0100 Subject: [PATCH 1/2] [extractor/polskieradio] fix live player --- yt_dlp/extractor/polskieradio.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index e0b22fffdf..4ec7dba541 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -459,7 +459,10 @@ class PolskieRadioPlayerIE(InfoExtractor): 'info_dict': { 'id': '3', 'ext': 'm4a', - 'title': 'Trójka', + 'title': r're:Trójka \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'thumbnail': 'https://player.polskieradio.pl/images/trojka-color-logo.png', + 'live_status': 'is_live', + 'display_id': 'trojka', }, 'params': { 'format': 'bestaudio', @@ -471,9 +474,10 @@ def _get_channel_list(self, channel_url='no_channel'): player_code = self._download_webpage( self._PLAYER_URL, channel_url, note='Downloading js player') - channel_list = js_to_json(self._search_regex( - r';var r="anteny",a=(\[.+?\])},', player_code, 'channel list')) - return self._parse_json(channel_list, channel_url) + return self._search_json( + r''';\s*var\s[a-zA-Z_]+\s*=\s*["']anteny["']\s*,\s*[a-zA-Z_]+\s*=\s*''', + player_code, 'channel list', channel_url, transform_source=js_to_json, + contains_pattern=r'\[{(?s:.+)}\]') def _real_extract(self, url): channel_url = self._match_id(url) @@ -496,19 +500,11 @@ def _real_extract(self, url): if not station: raise ExtractorError('Station not found even though we extracted channel') - formats = [] - for stream_url in station['Streams']: - stream_url = self._proto_relative_url(stream_url) - if stream_url.endswith('/playlist.m3u8'): - formats.extend(self._extract_m3u8_formats(stream_url, channel_url, live=True)) - elif stream_url.endswith('/manifest.f4m'): - formats.extend(self._extract_mpd_formats(stream_url, channel_url)) - elif stream_url.endswith('/Manifest'): - formats.extend(self._extract_ism_formats(stream_url, channel_url)) - else: - formats.append({ - 'url': stream_url, - }) + formats = self._extract_m3u8_formats( + next(( + stream_url.replace('http:', 'https:') for stream_url in station['Streams'] if stream_url.endswith('.m3u8') + )), + channel_url, live=True) return { 'id': compat_str(channel['id']), From 1a68959dc4e2dfb9071429c07598b14a34aae576 Mon Sep 17 00:00:00 2001 From: "lauren n. liberda" Date: Sat, 16 Mar 2024 00:53:22 +0100 Subject: [PATCH 2/2] [extractor/polskieradio] cache the channel list --- yt_dlp/extractor/polskieradio.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index 4ec7dba541..c63b5a81c7 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -471,13 +471,20 @@ class PolskieRadioPlayerIE(InfoExtractor): }] def _get_channel_list(self, channel_url='no_channel'): + webpage = self._download_webpage(self._BASE_URL, channel_url) + player_hash = self._search_regex(r'/main\.bundle\.js\?([a-f0-9]+)', webpage, 'player hash') + channel_list = self.cache.load('polskieradio-player-channel-list', player_hash) + if channel_list: + return channel_list player_code = self._download_webpage( self._PLAYER_URL, channel_url, note='Downloading js player') - return self._search_json( + channel_list = self._search_json( r''';\s*var\s[a-zA-Z_]+\s*=\s*["']anteny["']\s*,\s*[a-zA-Z_]+\s*=\s*''', player_code, 'channel list', channel_url, transform_source=js_to_json, contains_pattern=r'\[{(?s:.+)}\]') + self.cache.store('polskieradio-player-channel-list', player_hash, channel_list) + return channel_list def _real_extract(self, url): channel_url = self._match_id(url)