From feb1ae571589e0c8514d14c4e97bbea8da0eb194 Mon Sep 17 00:00:00 2001 From: Git User Date: Mon, 26 Feb 2024 14:59:15 -0500 Subject: [PATCH 1/2] Add support for downloading a show from Dropout, with all seasons --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/dropout.py | 61 +++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 583477b98..f505cd22e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -533,6 +533,7 @@ from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE from .dropout import ( + DropoutShowIE, DropoutSeasonIE, DropoutIE ) diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index 80ae6c126..fbda59474 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -1,4 +1,5 @@ import functools +import re from .common import InfoExtractor from .vimeo import VHXEmbedIE @@ -167,7 +168,8 @@ def _real_extract(self, url): class DropoutSeasonIE(InfoExtractor): _PAGE_SIZE = 24 - _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' + _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P[^\/$&?#]+)(?:/season:(?P[0-9]+))/?$' + _TESTS = [ { 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1', @@ -178,24 +180,6 @@ class DropoutSeasonIE(InfoExtractor): 'title': 'Dimension 20 Fantasy High - Season 1' } }, - { - 'url': 'https://www.dropout.tv/dimension-20-fantasy-high', - 'note': 'Multi-season series with the season not in the url', - 'playlist_count': 24, - 'info_dict': { - 'id': 'dimension-20-fantasy-high-season-1', - 'title': 'Dimension 20 Fantasy High - Season 1' - } - }, - { - 'url': 'https://www.dropout.tv/dimension-20-shriek-week', - 'note': 'Single-season series', - 'playlist_count': 4, - 'info_dict': { - 'id': 'dimension-20-shriek-week-season-1', - 'title': 'Dimension 20 Shriek Week - Season 1' - } - }, { 'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3', 'note': 'Multi-season series with season in the url that requires pagination', @@ -222,3 +206,42 @@ def _real_extract(self, url): return self.playlist_result( OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE), f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}') + +class DropoutShowIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P[^\/$&?#]+)/?$' + _TEST = { + 'url': 'https://www.dropout.tv/dirty-laundry/', + 'info_dict': { + 'id': 'dirty-laundry', + 'title': 'Dirty Laundry', + }, + 'playlist_mincount': 3, + } + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + show_title = self._html_search_regex( + r']*>(.+?)', webpage, 'show title', + default=None) or show_id.replace('-', ' ').title() + + season_urls = re.findall(r'