From bc344cd456380999c1ee74554dfd432a38f32ec7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 6 Jul 2023 18:39:50 +0530 Subject: [PATCH] [core] Allow extractors to mark formats as potentially DRM (#7396) This is useful for HLS where detecting whether the format is actually DRM requires the child manifest to be downloaded. Makes the error message when using `--test` inconsistent, but doesn't really matter. --- yt_dlp/YoutubeDL.py | 37 ++++++++++++++++++++++--------------- yt_dlp/downloader/hls.py | 30 ++++++++++++++++++++---------- yt_dlp/extractor/common.py | 10 ++++------ 3 files changed, 46 insertions(+), 31 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c5014f870..cf0122d4ba 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -983,6 +983,7 @@ def trouble(self, message=None, tb=None, is_error=True): ID='green', DELIM='blue', ERROR='red', + BAD_FORMAT='light red', WARNING='yellow', SUPPRESS='light black', ) @@ -2085,8 +2086,6 @@ def syntax_error(note, start): allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), 'video': self.params.get('allow_multiple_video_streams', False)} - check_formats = self.params.get('check_formats') == 'selected' - def _parse_filter(tokens): filter_parts = [] for type, string_, start, _, _ in tokens: @@ -2259,10 +2258,19 @@ def _merge(formats_pair): return new_dict def _check_formats(formats): - if not check_formats: + if (self.params.get('check_formats') is not None + or self.params.get('allow_unplayable_formats')): yield from formats return - yield from self._check_formats(formats) + elif self.params.get('check_formats') == 'selected': + yield from self._check_formats(formats) + return + + for f in formats: + if f.get('has_drm'): + yield from self._check_formats([f]) + else: + yield f def _build_selector_function(selector): if isinstance(selector, list): # , @@ -2614,10 +2622,10 @@ def sanitize_numeric_fields(info): if field_preference: info_dict['_format_sort_fields'] = field_preference - # or None ensures --clean-infojson removes it - info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None + info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it + f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None if not self.params.get('allow_unplayable_formats'): - formats = [f for f in formats if not f.get('has_drm')] + formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe'] if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats): self.report_warning( @@ -3719,14 +3727,13 @@ def simplified_codec(f, field): simplified_codec(f, 'acodec'), format_field(f, 'abr', '\t%dk', func=round), format_field(f, 'asr', '\t%s', func=format_decimal_suffix), - join_nonempty( - self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, - self._format_out('DRM', 'light red') if f.get('has_drm') else None, - format_field(f, 'language', '[%s]'), - join_nonempty(format_field(f, 'format_note'), - format_field(f, 'container', ignore=(None, f.get('ext'))), - delim=', '), - delim=' '), + join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty( + self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None, + (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe' + else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None), + format_field(f, 'format_note'), + format_field(f, 'container', ignore=(None, f.get('ext'))), + delim=', '), delim=' '), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] header_line = self._list_format_headers( 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO', diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index f2868dc52b..ab7d496d42 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -28,7 +28,16 @@ class HlsFD(FragmentFD): FD_NAME = 'hlsnative' @staticmethod - def can_download(manifest, info_dict, allow_unplayable_formats=False): + def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039 + return bool(re.search('|'.join(( + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay + r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + )), manifest)) + + @classmethod + def can_download(cls, manifest, info_dict, allow_unplayable_formats=False): UNSUPPORTED_FEATURES = [ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] @@ -50,13 +59,15 @@ def can_download(manifest, info_dict, allow_unplayable_formats=False): ] if not allow_unplayable_formats: UNSUPPORTED_FEATURES += [ - r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] + r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM ] def check_results(): yield not info_dict.get('is_live') for feature in UNSUPPORTED_FEATURES: yield not re.search(feature, manifest) + if not allow_unplayable_formats: + yield not cls._has_drm(manifest) return all(check_results()) def real_download(self, filename, info_dict): @@ -81,14 +92,13 @@ def real_download(self, filename, info_dict): message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') if not can_download: - has_drm = re.search('|'.join([ - r'#EXT-X-FAXS-CM:', # Adobe Flash Access - r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay - ]), s) - if has_drm and not self.params.get('allow_unplayable_formats'): - self.report_error( - 'This video is DRM protected; Try selecting another format with --format or ' - 'add --check-formats to automatically fallback to the next best format') + if self._has_drm(s) and not self.params.get('allow_unplayable_formats'): + if info_dict.get('has_drm') and self.params.get('test'): + self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True) + else: + self.report_error( + 'This format is DRM protected; Try selecting another format with --format or ' + 'add --check-formats to automatically fallback to the next best format', tb=False) return False message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3f7dcb82bb..fe08839aaa 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -26,6 +26,7 @@ from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name from ..cookies import LenientSimpleCookie from ..downloader.f4m import get_base_url, remove_encrypted_media +from ..downloader.hls import HlsFD from ..utils import ( IDENTITY, JSON_LD_RE, @@ -224,7 +225,8 @@ class InfoExtractor: width : height ratio as float. * no_resume The server does not support resuming the (HTTP or RTMP) download. Boolean. - * has_drm The format has DRM and cannot be downloaded. Boolean + * has_drm True if the format has DRM and cannot be downloaded. + 'maybe' if the format may have DRM and has to be tested before download. * extra_param_to_segment_url A query string to append to each fragment's URL, or to update each existing query string with. Only applied by the native HLS/DASH downloaders. @@ -1979,11 +1981,7 @@ def _parse_m3u8_formats_and_subtitles( errnote=None, fatal=True, data=None, headers={}, query={}, video_id=None): formats, subtitles = [], {} - - has_drm = re.search('|'.join([ - r'#EXT-X-FAXS-CM:', # Adobe Flash Access - r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay - ]), m3u8_doc) + has_drm = HlsFD._has_drm(m3u8_doc) def format_url(url): return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)