diff --git a/pyproject.toml b/pyproject.toml index 01162b794..a2442a14d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ dev = [ ] static-analysis = [ "autopep8~=2.0", - "ruff~=0.4.4", + "ruff~=0.5.0", ] test = [ "pytest~=8.1", @@ -211,6 +211,7 @@ ignore = [ "TD002", # missing-todo-author "TD003", # missing-todo-link "PLE0604", # invalid-all-object (false positives) + "PLE0643", # potential-index-error (false positives) "PLW0603", # global-statement "PLW1510", # subprocess-run-without-check "PLW2901", # redefined-loop-name diff --git a/test/test_download.py b/test/test_download.py index 882d54565..3f36869d9 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -20,7 +20,6 @@ gettestcases, getwebpagetestcases, is_download_test, - report_warning, try_rm, ) @@ -178,8 +177,7 @@ def try_rm_tcs_files(tcs=None): raise if try_num == RETRIES: - report_warning(f'{tname} failed due to network errors, skipping...') - return + raise print(f'Retrying: {try_num} failed tries\n\n##########\n\n') diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index bb31fbb88..ba46da6b0 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -76,6 +76,7 @@ ) from .aeonco import AeonCoIE from .afreecatv import ( + AfreecaTVCatchStoryIE, AfreecaTVIE, AfreecaTVLiveIE, AfreecaTVUserIE, diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index bcfb02cb9..f51b5a68b 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -72,7 +72,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): )\?.*?\bnTitleNo=| vod\.afreecatv\.com/(PLAYER/STATION|player)/ ) - (?P\d+) + (?P\d+)/?(?:$|[?#&]) ''' _TESTS = [{ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', @@ -189,7 +189,7 @@ def _real_extract(self, url): headers={'Referer': url}, data=urlencode_postdata({ 'nTitleNo': video_id, 'nApiLevel': 10, - }))['data'] + }), impersonate=True)['data'] error_code = traverse_obj(data, ('code', {int})) if error_code == -6221: @@ -253,6 +253,43 @@ def _real_extract(self, url): return self.playlist_result(entries, video_id, multi_video=True, **common_info) +class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): + IE_NAME = 'afreecatv:catchstory' + IE_DESC = 'afreecatv.com catch story' + _VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P\d+)/catchstory' + _TESTS = [{ + 'url': 'https://vod.afreecatv.com/player/103247/catchstory', + 'info_dict': { + 'id': '103247', + }, + 'playlist_count': 2, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._download_json( + 'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url}, + query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True) + + return self.playlist_result(self._entries(data), video_id) + + @staticmethod + def _entries(data): + # 'files' is always a list with 1 element + yield from traverse_obj(data, ( + 'data', lambda _, v: v['story_type'] == 'catch', + 'catch_list', lambda _, v: v['files'][0]['file'], { + 'id': ('files', 0, 'file_info_key', {str}), + 'url': ('files', 0, 'file', {url_or_none}), + 'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}), + 'title': ('title', {str}), + 'uploader': ('writer_nick', {str}), + 'uploader_id': ('writer_id', {str}), + 'thumbnail': ('thumb', {url_or_none}), + 'timestamp': ('write_timestamp', {int_or_none}), + })) + + class AfreecaTVLiveIE(AfreecaTVBaseIE): IE_NAME = 'afreecatv:live' IE_DESC = 'afreecatv.com livestreams' diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 7c8139714..0fe95bec5 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor): ] _API_BASE = 'https://api.atresplayer.com/' - def _handle_error(self, e, code): - if isinstance(e.cause, HTTPError) and e.cause.status == code: - error = self._parse_json(e.cause.response.read(), None) - if error.get('error') == 'required_registered': - self.raise_login_required() - raise ExtractorError(error['error_description'], expected=True) - raise - def _perform_login(self, username, password): self._request_webpage( self._API_BASE + 'login', None, 'Downloading login page') @@ -55,7 +47,9 @@ def _perform_login(self, username, password): 'password': password, }))['targetUrl'] except ExtractorError as e: - self._handle_error(e, 400) + if isinstance(e.cause, HTTPError) and e.cause.status == 400: + raise ExtractorError('Invalid username and/or password', expected=True) + raise self._request_webpage(target_url, None, 'Following Target URL') @@ -66,7 +60,12 @@ def _real_extract(self, url): episode = self._download_json( self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) except ExtractorError as e: - self._handle_error(e, 403) + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + error = self._parse_json(e.cause.response.read(), None) + if error.get('error') == 'required_registered': + self.raise_login_required() + raise ExtractorError(error['error_description'], expected=True) + raise title = episode['titulo'] diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index c74f34c2a..c83222ea5 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -24,7 +24,7 @@ class BitChuteIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P[^/?#&]+)' _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P{_VALID_URL})'] _TESTS = [{ 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', @@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor): }, { 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', 'only_matching': True, + }, { + 'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/', + 'only_matching': True, }] _GEO_BYPASS = False @@ -132,7 +135,7 @@ def _make_url(html): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) + f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) self._raise_if_restricted(webpage) publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) @@ -171,13 +174,13 @@ def _real_extract(self, url): class BitChuteChannelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?Pchannel|playlist)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?Pchannel|playlist)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bitchute.com/channel/bitchute/', 'info_dict': { 'id': 'bitchute', 'title': 'BitChute', - 'description': 'md5:5329fb3866125afa9446835594a9b138', + 'description': 'md5:2134c37d64fc3a4846787c402956adac', }, 'playlist': [ { @@ -210,6 +213,9 @@ class BitChuteChannelIE(InfoExtractor): 'title': 'Bruce MacDonald and "The Light of Darkness"', 'description': 'md5:747724ef404eebdfc04277714f81863e', }, + }, { + 'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/', + 'only_matching': True, }] _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' @@ -230,7 +236,7 @@ class BitChuteChannelIE(InfoExtractor): @staticmethod def _make_url(playlist_id, playlist_type): - return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/' + return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/' def _fetch_page(self, playlist_id, playlist_type, page_num): playlist_url = self._make_url(playlist_id, playlist_type) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 740e12926..1522b08e2 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -455,10 +455,8 @@ def _get_claims_token_expiry(self): def claims_token_expired(self): exp = self._get_claims_token_expiry() - if exp - time.time() < 10: - # It will expire in less than 10 seconds, or has already expired - return True - return False + # It will expire in less than 10 seconds, or has already expired + return exp - time.time() < 10 def claims_token_valid(self): return self._claims_token is not None and not self.claims_token_expired() diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 594ce2d0b..8b4d5c0fc 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -1,16 +1,16 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, - parse_resolution, - traverse_obj, try_get, + url_or_none, urlencode_postdata, ) +from ..utils.traversal import traverse_obj class DigitalConcertHallIE(InfoExtractor): IE_DESC = 'DigitalConcertHall extractor' - _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P[a-z]+)/(?Pfilm|concert)/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P[a-z]+)/(?Pfilm|concert|work)/(?P[0-9]+)-?(?P[0-9]+)?' _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token' _ACCESS_TOKEN = None _NETRC_MACHINE = 'digitalconcerthall' @@ -26,7 +26,8 @@ class DigitalConcertHallIE(InfoExtractor): 'upload_date': '20210624', 'timestamp': 1624548600, 'duration': 2798, - 'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler', + 'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'], + 'composers': ['Kurt Weill'], }, 'params': {'skip_download': 'm3u8'}, }, { @@ -34,8 +35,9 @@ class DigitalConcertHallIE(InfoExtractor): 'url': 'https://www.digitalconcerthall.com/en/concert/53785', 'info_dict': { 'id': '53785', - 'album_artist': 'Berliner Philharmoniker / Kirill Petrenko', + 'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'], 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich', + 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$', }, 'params': {'skip_download': 'm3u8'}, 'playlist_count': 3, @@ -49,9 +51,20 @@ class DigitalConcertHallIE(InfoExtractor): 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$', 'upload_date': '20220714', 'timestamp': 1657785600, - 'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff', + 'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'], }, 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'Concert with several works and an interview', + 'url': 'https://www.digitalconcerthall.com/en/work/53785-1', + 'info_dict': { + 'id': '53785', + 'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'], + 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich', + 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, + 'playlist_count': 1, }] def _perform_login(self, username, password): @@ -97,15 +110,14 @@ def _entries(self, items, language, type_, **kwargs): 'Accept-Language': language, }) - m3u8_url = traverse_obj( - stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False) - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False) + formats = [] + for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): + formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False)) yield { 'id': video_id, 'title': item.get('title'), 'composer': item.get('name_composer'), - 'url': m3u8_url, 'formats': formats, 'duration': item.get('duration_total'), 'timestamp': traverse_obj(item, ('date', 'published')), @@ -119,31 +131,32 @@ def _entries(self, items, language, type_, **kwargs): } def _real_extract(self, url): - language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id') + language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part') if not language: language = 'en' - thumbnail_url = self._html_search_regex( - r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)', - self._download_webpage(url, video_id), 'thumbnail') - thumbnails = [{ - 'url': thumbnail_url, - **parse_resolution(thumbnail_url), - }] - + api_type = 'concert' if type_ == 'work' else type_ vid_info = self._download_json( - f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={ + f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={ 'Accept': 'application/json', 'Accept-Language': language, }) - album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '') + album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...)) + if type_ == 'work': + videos = [videos[int(part) - 1]] + + thumbnail = traverse_obj(vid_info, ( + 'image', ..., {self._proto_relative_url}, {url_or_none}, + {lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size + return { '_type': 'playlist', 'id': video_id, 'title': vid_info.get('title'), - 'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_), - 'thumbnails': thumbnails, - 'album_artist': album_artist, + 'entries': self._entries( + videos, language, type_, thumbnail=thumbnail, album_artists=album_artists), + 'thumbnail': thumbnail, + 'album_artists': album_artists, } diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py index 5898e1f49..30d98ba79 100644 --- a/yt_dlp/extractor/jiocinema.py +++ b/yt_dlp/extractor/jiocinema.py @@ -364,20 +364,25 @@ class JioCinemaSeriesIE(JioCinemaBaseIE): 'title': 'naagin', }, 'playlist_mincount': 120, + }, { + 'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820', + 'info_dict': { + 'id': '3499820', + 'title': 'mtv-splitsvilla-x5', + }, + 'playlist_mincount': 310, }] def _entries(self, series_id): - seasons = self._download_json( - f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id, - 'Downloading series metadata JSON', query={ - 'sort': 'season:asc', - 'id': series_id, - 'responseType': 'common', - }) + seasons = traverse_obj(self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id, + 'Downloading series metadata JSON', query={'responseType': 'common'}), ( + 'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter', + 'trayTabs', lambda _, v: v['id'])) - for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1): + for season_num, season in enumerate(seasons, start=1): season_id = season['id'] - label = season.get('season') or season_num + label = season.get('label') or season_num for page_num in itertools.count(1): episodes = traverse_obj(self._download_json( f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode', diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index 8a693dc0b..6f67602a6 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -9,9 +9,10 @@ join_nonempty, parse_duration, parse_iso8601, - traverse_obj, try_get, + url_or_none, ) +from ..utils.traversal import traverse_obj class MLBBaseIE(InfoExtractor): @@ -326,15 +327,20 @@ def _real_extract(self, url): video_id)['data']['Airings'] formats, subtitles = [], {} - for airing in airings: - m3u8_url = self._download_json( + for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']): + format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing) + m3u8_url = traverse_obj(self._download_json( airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id, - headers={ + note=f'Downloading {format_id} stream info JSON', + errnote=f'Failed to download {format_id} stream info, skipping', + fatal=False, headers={ 'Authorization': self._access_token, 'Accept': 'application/vnd.media-service+json; version=2', - })['stream']['complete'] + }), ('stream', 'complete', {url_or_none})) + if not m3u8_url: + continue f, s = self._extract_m3u8_formats_and_subtitles( - m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage'))) + m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) formats.extend(f) self._merge_subtitles(s, target=subtitles) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7aa84aa8b..094b1e9a3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -468,7 +468,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko', ] - _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'} + _IGNORED_WARNINGS = { + 'Unavailable videos will be hidden during playback', + 'Unavailable videos are hidden', + } _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 5c82de19e..a0f32892f 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -667,12 +667,12 @@ def eval_method(): self.interpret_expression(v, local_vars, allow_recursion) for v in self._separate(arg_str)] - if obj == str: + if obj is str: if member == 'fromCharCode': assertion(argvals, 'takes one or more arguments') return ''.join(map(chr, argvals)) raise self.Exception(f'Unsupported String method {member}', expr) - elif obj == float: + elif obj is float: if member == 'pow': assertion(len(argvals) == 2, 'takes two arguments') return argvals[0] ** argvals[1] diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index c69c54b3a..86850c185 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -230,9 +230,7 @@ class Urllib3LoggingFilter(logging.Filter): def filter(self, record): # Ignore HTTP request messages since HTTPConnection prints those - if record.msg == '%s://%s:%s "%s %s %s" %s %s': - return False - return True + return record.msg != '%s://%s:%s "%s %s %s" %s %s' class Urllib3LoggingHandler(logging.Handler):