From 88acdbc2698169e22cdbf358e44765150434c69e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 23 Aug 2021 01:38:38 +0530 Subject: [PATCH] [extractor] Better error message for DRM (#729) Closes #636 --- README.md | 7 ++----- yt_dlp/YoutubeDL.py | 32 +++++++++++++++++++++++------- yt_dlp/extractor/brightcove.py | 2 +- yt_dlp/extractor/ceskatelevize.py | 6 +++--- yt_dlp/extractor/common.py | 25 ++++++++++------------- yt_dlp/extractor/corus.py | 2 +- yt_dlp/extractor/crackle.py | 4 ++-- yt_dlp/extractor/globo.py | 2 +- yt_dlp/extractor/hotstar.py | 3 ++- yt_dlp/extractor/ninecninemedia.py | 3 +-- yt_dlp/extractor/ninenow.py | 7 ++++--- yt_dlp/extractor/npo.py | 3 +-- yt_dlp/extractor/prosiebensat1.py | 2 +- yt_dlp/extractor/rai.py | 2 +- yt_dlp/extractor/ruutu.py | 2 +- yt_dlp/extractor/shahid.py | 2 +- yt_dlp/extractor/sonyliv.py | 2 +- yt_dlp/extractor/toggle.py | 3 +-- yt_dlp/extractor/tv2.py | 2 +- yt_dlp/extractor/vidio.py | 3 +-- yt_dlp/extractor/wakanim.py | 3 +-- yt_dlp/extractor/wat.py | 2 +- yt_dlp/extractor/youtube.py | 3 +-- yt_dlp/options.py | 6 ++---- 24 files changed, 66 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index bd7d31c13f..20be114686 100644 --- a/README.md +++ b/README.md @@ -668,11 +668,6 @@ ## Video Format Options: bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no merge is required - --allow-unplayable-formats Allow unplayable formats to be listed and - downloaded. All video post-processing will - also be turned off - --no-allow-unplayable-formats Do not allow unplayable formats to be - listed or downloaded (default) ## Subtitle Options: --write-subs Write subtitle file @@ -1470,6 +1465,8 @@ #### Developer options --test Download only part of video for testing extractors --youtube-print-sig-code For testing youtube signatures + --allow-unplayable-formats List unplayable formats also + --no-allow-unplayable-formats Default #### Old aliases diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a422674f5e..a6e4c61778 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -497,6 +497,12 @@ def __init__(self, params=None, auto_init=True): self.report_warning( 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2]) + if self.params.get('allow_unplayable_formats'): + self.report_warning( + 'You have asked for unplayable formats to be listed/downloaded. ' + 'This is a developer option intended for debugging. ' + 'If you experience any issues while using this option, DO NOT open a bug report') + def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion)) @@ -827,6 +833,14 @@ def report_file_delete(self, file_name): except UnicodeEncodeError: self.to_screen('Deleting existing file') + def raise_no_formats(self, has_drm=False, forced=False): + msg = 'This video is DRM protected' if has_drm else 'No video formats found!' + expected = self.params.get('ignore_no_formats_error') + if forced or not expected: + raise ExtractorError(msg, expected=has_drm or expected) + else: + self.report_warning(msg) + def parse_outtmpl(self): outtmpl_dict = self.params.get('outtmpl', {}) if not isinstance(outtmpl_dict, dict): @@ -2151,11 +2165,12 @@ def sanitize_numeric_fields(info): else: formats = info_dict['formats'] + if not self.params.get('allow_unplayable_formats'): + formats = [f for f in formats if not f.get('has_drm')] + info_dict['__has_drm'] = len(info_dict.get('formats') or ['']) > len(formats) + if not formats: - if not self.params.get('ignore_no_formats_error'): - raise ExtractorError('No video formats found!') - else: - self.report_warning('No video formats found!') + self.raise_no_formats(info_dict.get('__has_drm')) def is_wellformed(f): url = f.get('url') @@ -2219,7 +2234,7 @@ def is_wellformed(f): # TODO Central sorting goes here - if formats and formats[0] is not info_dict: + if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, @@ -2232,8 +2247,9 @@ def is_wellformed(f): self.list_thumbnails(info_dict) if self.params.get('listformats'): if not info_dict.get('formats') and not info_dict.get('url'): - raise ExtractorError('No video formats found', expected=True) - self.list_formats(info_dict) + self.to_screen('%s has no formats' % info_dict['id']) + else: + self.list_formats(info_dict) if self.params.get('listsubtitles'): if 'automatic_captions' in info_dict: self.list_subtitles( @@ -2410,6 +2426,8 @@ def print_optional(field): self.to_stdout(json.dumps(self.sanitize_info(info_dict))) def dl(self, name, info, subtitle=False, test=False): + if not info.get('url'): + self.raise_no_formats(info.get('__has_drm'), forced=True) if test: verbose = self.params.get('verbose') diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index f3d955d6bf..bb68dc481f 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -549,7 +549,7 @@ def build_format_id(kind): error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) elif (not self.get_param('allow_unplayable_formats') and sources and num_drm_sources == len(sources)): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_drm(video_id) self._sort_formats(formats) diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index b2ebfdadd7..5e04d38a25 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -147,9 +147,6 @@ def _real_extract(self, url): is_live = item.get('type') == 'LIVE' formats = [] for format_id, stream_url in item.get('streamUrls', {}).items(): - if (not self.get_param('allow_unplayable_formats') - and 'drmOnly=true' in stream_url): - continue if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', 'm3u8_native', @@ -158,6 +155,9 @@ def _real_extract(self, url): stream_formats = self._extract_mpd_formats( stream_url, playlist_id, mpd_id='dash-%s' % format_id, fatal=False) + if 'drmOnly=true' in stream_url: + for f in stream_formats: + f['has_drm'] = True # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031 if format_id == 'audioDescription': for f in stream_formats: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 734651193a..85ac4857ef 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -203,6 +203,7 @@ class InfoExtractor(object): width : height ratio as float. * no_resume The server does not support resuming the (HTTP or RTMP) download. Boolean. + * has_drm The format has DRM and cannot be downloaded. Boolean * downloader_options A dictionary of downloader options as described in FileDownloader RTMP formats can also have the additional fields: page_url, @@ -1024,6 +1025,9 @@ def get_param(self, name, default=None, *args, **kwargs): return self._downloader.params.get(name, default, *args, **kwargs) return default + def report_drm(self, video_id, partial=False): + self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id) + def report_extraction(self, id_or_name): """Report information extraction.""" self.to_screen('%s: Extracting information' % id_or_name) @@ -1752,9 +1756,7 @@ def calculate_preference(self, format): def _sort_formats(self, formats, field_preference=[]): if not formats: - if self.get_param('ignore_no_formats_error'): - return - raise ExtractorError('No video formats found') + return format_sort = self.FormatSort() # params and to_screen are taken from the downloader format_sort.evaluate_params(self._downloader.params, field_preference) if self.get_param('verbose', False): @@ -1992,9 +1994,7 @@ def _parse_m3u8_formats_and_subtitles( if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access return formats, subtitles - if (not self.get_param('allow_unplayable_formats') - and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay - return formats, subtitles + has_drm = re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc) def format_url(url): return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url) @@ -2040,6 +2040,7 @@ def _extract_m3u8_playlist_indices(*args, **kwargs): 'protocol': entry_protocol, 'preference': preference, 'quality': quality, + 'has_drm': has_drm, } for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)] return formats, subtitles @@ -2573,8 +2574,6 @@ def extract_Initialization(source): extract_Initialization(segment_template) return ms_info - skip_unplayable = not self.get_param('allow_unplayable_formats') - mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats = [] subtitles = {} @@ -2585,12 +2584,8 @@ def extract_Initialization(source): 'timescale': 1, }) for adaptation_set in period.findall(_add_ns('AdaptationSet')): - if skip_unplayable and is_drm_protected(adaptation_set): - continue adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info) for representation in adaptation_set.findall(_add_ns('Representation')): - if skip_unplayable and is_drm_protected(representation): - continue representation_attrib = adaptation_set.attrib.copy() representation_attrib.update(representation.attrib) # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory @@ -2662,6 +2657,8 @@ def extract_Initialization(source): 'acodec': 'none', 'vcodec': 'none', } + if is_drm_protected(adaptation_set) or is_drm_protected(representation): + f['has_drm'] = True representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) def prepare_template(template_name, identifiers): @@ -2848,9 +2845,6 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None): """ if ism_doc.get('IsLive') == 'TRUE': return [], {} - if (not self.get_param('allow_unplayable_formats') - and ism_doc.find('Protection') is not None): - return [], {} duration = int(ism_doc.attrib['Duration']) timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 @@ -2941,6 +2935,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None): 'acodec': 'none' if stream_type == 'video' else fourcc, 'protocol': 'ism', 'fragments': fragments, + 'has_drm': ism_doc.find('Protection') is not None, '_download_params': { 'stream_type': stream_type, 'duration': duration, diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index 8f2706736e..352951e201 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -130,7 +130,7 @@ def _real_extract(self, url): formats.extend(self._parse_smil_formats( smil, smil_url, video_id, namespace)) if not formats and video.get('drm'): - self.raise_no_formats('This video is DRM protected.', expected=True) + self.report_drm(video_id) self._sort_formats(formats) subtitles = {} diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py index 216e713114..2c9d28d2e2 100644 --- a/yt_dlp/extractor/crackle.py +++ b/yt_dlp/extractor/crackle.py @@ -176,8 +176,8 @@ def _real_extract(self, url): 'width': mfs_info['width'], 'height': mfs_info['height'], }) - if not formats and has_drm and not ignore_no_formats: - raise ExtractorError('The video is DRM protected', expected=True) + if not formats and has_drm: + self.report_drm(video_id) self._sort_formats(formats) description = media.get('Description') diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py index dd8fae5366..0cb3aa31bf 100644 --- a/yt_dlp/extractor/globo.py +++ b/yt_dlp/extractor/globo.py @@ -97,7 +97,7 @@ def _real_extract(self, url): 'http://api.globovideos.com/videos/%s/playlist' % video_id, video_id)['videos'][0] if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True: - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_drm(video_id) title = video['title'] diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index e00e58646d..8d8a8bd75e 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -182,7 +182,8 @@ def _real_extract(self, url): title = video_data['title'] if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_drm(video_id) + headers = {'Referer': 'https://www.hotstar.com/in'} formats = [] subs = {} diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index d1281167a7..4aaf21a120 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -4,7 +4,6 @@ from .common import InfoExtractor from ..utils import ( - ExtractorError, float_or_none, int_or_none, parse_iso8601, @@ -35,7 +34,7 @@ def _real_extract(self, url): if (not self.get_param('allow_unplayable_formats') and try_get(content_package, lambda x: x['Constraints']['Security']['Type'])): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_drm(content_id) manifest_base_url = content_package_url + 'manifest.' formats = [] diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index 43b9c4e7ea..0ee450cc5c 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -66,11 +66,12 @@ def _real_extract(self, url): video_data = common_data['video'] - if not self.get_param('allow_unplayable_formats') and video_data.get('drm'): - raise ExtractorError('This video is DRM protected.', expected=True) - brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId'] video_id = compat_str(video_data.get('id') or brightcove_id) + + if not self.get_param('allow_unplayable_formats') and video_data.get('drm'): + self.report_drm(video_id) + title = common_data['name'] thumbnails = [{ diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py index 6984b76a9e..ed547d04b3 100644 --- a/yt_dlp/extractor/npo.py +++ b/yt_dlp/extractor/npo.py @@ -247,8 +247,7 @@ def _get_info(self, url, video_id): if not formats: if not self.get_param('allow_unplayable_formats') and drm: - self.raise_no_formats('This video is DRM protected.', expected=True) - return + self.report_drm(video_id) self._sort_formats(formats) diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py index 707146be7e..e89bbfd279 100644 --- a/yt_dlp/extractor/prosiebensat1.py +++ b/yt_dlp/extractor/prosiebensat1.py @@ -35,7 +35,7 @@ def _extract_video_info(self, url, clip_id): })[0] if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True: - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_drm(clip_id) formats = [] if self._ACCESS_ID: diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 9f5bed4c41..27cd018012 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -281,7 +281,7 @@ def _real_extract(self, url): (lambda x: x['rights_management']['rights']['drm'], lambda x: x['program_info']['rights_management']['rights']['drm']), dict): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_drm(video_id) title = media['name'] video = media['video'] diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index 6a78441ef8..d9cf39d712 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -202,7 +202,7 @@ def pv(name): if not formats: if (not self.get_param('allow_unplayable_formats') and xpath_text(video_xml, './Clip/DRM', default=None)): - self.raise_no_formats('This video is DRM protected.', expected=True) + self.report_drm(video_id) ns_st_cds = pv('ns_st_cds') if ns_st_cds != 'free': raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index dc590129c3..42de41a119 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -119,7 +119,7 @@ def _real_extract(self, url): 'playout/new/url/' + video_id, video_id)['playout'] if not self.get_param('allow_unplayable_formats') and playout.get('drm'): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_drm(video_id) formats = self._extract_m3u8_formats(re.sub( # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py index 60181f06d2..c3ed442753 100644 --- a/yt_dlp/extractor/sonyliv.py +++ b/yt_dlp/extractor/sonyliv.py @@ -83,7 +83,7 @@ def _real_extract(self, url): content = self._call_api( '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id) if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_drm(video_id) dash_url = content['videoURL'] headers = { 'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000) diff --git a/yt_dlp/extractor/toggle.py b/yt_dlp/extractor/toggle.py index 3f4f6e827a..eb873495fc 100644 --- a/yt_dlp/extractor/toggle.py +++ b/yt_dlp/extractor/toggle.py @@ -155,8 +155,7 @@ def _real_extract(self, url): for meta in (info.get('Metas') or []): if (not self.get_param('allow_unplayable_formats') and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'): - self.raise_no_formats( - 'This video is DRM protected.', expected=True) + self.report_drm(video_id) # Most likely because geo-blocked if no formats and no DRM self._sort_formats(formats) diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index f3480de56d..e9da11f865 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -103,7 +103,7 @@ def _real_extract(self, url): 'filesize': int_or_none(item.get('fileSize')), }) if not formats and data.get('drmProtected'): - self.raise_no_formats('This video is DRM protected.', expected=True) + self.report_drm(video_id) self._sort_formats(formats) thumbnails = [{ diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 2f5ea5c0b4..571448bf21 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -247,8 +247,7 @@ def _real_extract(self, url): formats = [] if stream_meta.get('is_drm'): if not self.get_param('allow_unplayable_formats'): - self.raise_no_formats( - 'This video is DRM protected.', expected=True) + self.report_drm(video_id) if stream_meta.get('is_premium'): sources = self._download_json( 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id, diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py index baa87e27a2..c956d616ed 100644 --- a/yt_dlp/extractor/wakanim.py +++ b/yt_dlp/extractor/wakanim.py @@ -3,7 +3,6 @@ from .common import InfoExtractor from ..utils import ( - ExtractorError, merge_dicts, urljoin, ) @@ -47,7 +46,7 @@ def _real_extract(self, url): r'encryption%3D(c(?:enc|bc(?:s-aapl)?))', m3u8_url, 'encryption', default=None) if encryption in ('cenc', 'cbcs-aapl'): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_drm(video_id) formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', diff --git a/yt_dlp/extractor/wat.py b/yt_dlp/extractor/wat.py index 0f1d08da35..9ff4523db6 100644 --- a/yt_dlp/extractor/wat.py +++ b/yt_dlp/extractor/wat.py @@ -92,7 +92,7 @@ def extract_formats(manifest_urls): extract_formats({delivery.get('format'): delivery.get('url')}) if not formats: if delivery.get('drm'): - self.raise_no_formats('This video is DRM protected.', expected=True) + self.report_drm(video_id) manifest_urls = self._download_json( 'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False) if manifest_urls: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index c686d828c7..375eca8f84 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2793,8 +2793,7 @@ def feed_entry(name): if not formats: if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')): - self.raise_no_formats( - 'This video is DRM protected.', expected=True) + self.report_drm(video_id) pemr = get_first( playability_statuses, ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {} diff --git a/yt_dlp/options.py b/yt_dlp/options.py index ef821eb11a..56ab001bef 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -580,13 +580,11 @@ def _dict_from_options_callback( video_format.add_option( '--allow-unplayable-formats', action='store_true', dest='allow_unplayable_formats', default=False, - help=( - 'Allow unplayable formats to be listed and downloaded. ' - 'All video post-processing will also be turned off')) + help=optparse.SUPPRESS_HELP) video_format.add_option( '--no-allow-unplayable-formats', action='store_false', dest='allow_unplayable_formats', - help='Do not allow unplayable formats to be listed or downloaded (default)') + help=optparse.SUPPRESS_HELP) subtitles = optparse.OptionGroup(parser, 'Subtitle Options') subtitles.add_option(