From 7395ddbee1f51060144c26c93c0bc9a33276a7af Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 23 Aug 2024 05:40:18 +0000 Subject: [PATCH 1/3] [ie/DrTalks] Add extractor --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/drtalks.py | 56 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 yt_dlp/extractor/drtalks.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9b73fcd75e..000a3cabb7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -548,6 +548,7 @@ DropoutIE, DropoutSeasonIE, ) +from .drtalks import DrTalksIE from .drtuber import DrTuberIE from .drtv import ( DRTVIE, diff --git a/yt_dlp/extractor/drtalks.py b/yt_dlp/extractor/drtalks.py new file mode 100644 index 0000000000..45c9d2c094 --- /dev/null +++ b/yt_dlp/extractor/drtalks.py @@ -0,0 +1,56 @@ +import functools +import urllib.parse + +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor +from ..utils import extract_attributes + + +class DrTalksIE(InfoExtractor): + _VALID_URL = r'https?://event\.drtalks\.com/(?P.+/[^/]+)/?' + + _TESTS = [{ + 'url': 'https://event.drtalks.com/reversing-heart-disease-summit/free-access-day-1', + 'info_dict': { + 'id': '1758074870279626053', + 'title': 'Free Access Day 1 - Events at DrTalks', + 'thumbnail': 're:https://event.drtalks.com/wp-content/uploads/.+', + }, + 'playlist_mincount': 11, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://event.drtalks.com/bioenergetics-2022/free-access-day-1/', + 'info_dict': { + 'id': '1747611460188466596', + 'title': 'The BioEnergetics Summit', + 'thumbnail': 're:https://event.drtalks.com/wp-content/uploads/.+', + }, + 'playlist_mincount': 8, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://event.drtalks.com/mitochondrial-summit/encore-access-day-6', + 'only_matching': True, + }, { + 'url': 'https://event.drtalks.com/medicine-of-mindset-summit/free-access-day-1/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_attrs = extract_attributes(self._search_regex(r'(]+>)', webpage, 'player')) + + playlist_url = functools.reduce(urllib.parse.urljoin, [ + 'https://players.brightcove.net/', + f'{player_attrs["data-account"]}/', + f'{player_attrs["data-player"]}_{player_attrs["data-embed"]}/', + f'index.html?playlistId={player_attrs["data-playlist-id"]}', + ]) + + return self.url_result( + playlist_url, BrightcoveNewIE.ie_key(), video_id, self._og_search_title(webpage), + url_transparent=True, thumbnail=self._og_search_thumbnail(webpage)) From ef9acc949e1fdc8f0d49a5d8717c475f769b7347 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:17:33 +0000 Subject: [PATCH 2/3] BRIGHTCOVE_URL_TEMPLATE --- yt_dlp/extractor/drtalks.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/drtalks.py b/yt_dlp/extractor/drtalks.py index 45c9d2c094..af327edd74 100644 --- a/yt_dlp/extractor/drtalks.py +++ b/yt_dlp/extractor/drtalks.py @@ -1,9 +1,7 @@ -import functools -import urllib.parse from .brightcove import BrightcoveNewIE from .common import InfoExtractor -from ..utils import extract_attributes +from ..utils import extract_attributes, smuggle_url class DrTalksIE(InfoExtractor): @@ -14,7 +12,7 @@ class DrTalksIE(InfoExtractor): 'info_dict': { 'id': '1758074870279626053', 'title': 'Free Access Day 1 - Events at DrTalks', - 'thumbnail': 're:https://event.drtalks.com/wp-content/uploads/.+', + 'thumbnail': r're:https://event.drtalks.com/wp-content/uploads/.+', }, 'playlist_mincount': 11, 'params': { @@ -25,7 +23,7 @@ class DrTalksIE(InfoExtractor): 'info_dict': { 'id': '1747611460188466596', 'title': 'The BioEnergetics Summit', - 'thumbnail': 're:https://event.drtalks.com/wp-content/uploads/.+', + 'thumbnail': r're:https://event.drtalks.com/wp-content/uploads/.+', }, 'playlist_mincount': 8, 'params': { @@ -38,19 +36,19 @@ class DrTalksIE(InfoExtractor): 'url': 'https://event.drtalks.com/medicine-of-mindset-summit/free-access-day-1/', 'only_matching': True, }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?playlistId=%s' def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - player_attrs = extract_attributes(self._search_regex(r'(]+>)', webpage, 'player')) - - playlist_url = functools.reduce(urllib.parse.urljoin, [ - 'https://players.brightcove.net/', - f'{player_attrs["data-account"]}/', - f'{player_attrs["data-player"]}_{player_attrs["data-embed"]}/', - f'index.html?playlistId={player_attrs["data-playlist-id"]}', - ]) + player_attrs = extract_attributes(self._search_regex( + r'(]+\bid=(["\'])myPlayerID\2[^>]*>)', webpage, 'player')) + bc_url = smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % ( + player_attrs.get('data-account', '6314452011001'), + player_attrs.get('data-player', 'f3rfrCUjm'), + player_attrs.get('data-embed', 'default'), + player_attrs['data-playlist-id']), {'source_url': url}) return self.url_result( - playlist_url, BrightcoveNewIE.ie_key(), video_id, self._og_search_title(webpage), + bc_url, BrightcoveNewIE.ie_key(), video_id, self._og_search_title(webpage), url_transparent=True, thumbnail=self._og_search_thumbnail(webpage)) From 25fbd01c3ccbf52f32846c534c89ad2cc8e7cb0c Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:43:55 +0000 Subject: [PATCH 3/3] no leading empty line --- yt_dlp/extractor/drtalks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/drtalks.py b/yt_dlp/extractor/drtalks.py index af327edd74..2d96fb4c15 100644 --- a/yt_dlp/extractor/drtalks.py +++ b/yt_dlp/extractor/drtalks.py @@ -1,4 +1,3 @@ - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..utils import extract_attributes, smuggle_url