[extractor] Better error message for DRM (#729)

Closes #636
2024-11-27 06:10:12 +01:00 · 2021-08-23 01:38:38 +05:30 · 2021-08-23 01:38:38 +05:30 · 88acdbc269
commit 88acdbc269
parent 9b5fa9ee7c
24 changed files with 66 additions and 62 deletions
--- a/README.md
+++ b/README.md
@ -668,11 +668,6 @@ ## Video Format Options:
                                     bestvideo+bestaudio), output to given
                                     container format. One of mkv, mp4, ogg,
                                     webm, flv. Ignored if no merge is required
-    --allow-unplayable-formats       Allow unplayable formats to be listed and
-                                     downloaded. All video post-processing will
-                                     also be turned off
-    --no-allow-unplayable-formats    Do not allow unplayable formats to be
-                                     listed or downloaded (default)

 ## Subtitle Options:
    --write-subs                     Write subtitle file
@ -1470,6 +1465,8 @@ #### Developer options

    --test                           Download only part of video for testing extractors
    --youtube-print-sig-code         For testing youtube signatures
+    --allow-unplayable-formats       List unplayable formats also
+    --no-allow-unplayable-formats    Default


 #### Old aliases
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -497,6 +497,12 @@ def __init__(self, params=None, auto_init=True):
            self.report_warning(
                'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])

+        if self.params.get('allow_unplayable_formats'):
+            self.report_warning(
+                'You have asked for unplayable formats to be listed/downloaded. '
+                'This is a developer option intended for debugging. '
+                'If you experience any issues while using this option, DO NOT open a bug report')
+
        def check_deprecated(param, option, suggestion):
            if self.params.get(param) is not None:
                self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
@ -827,6 +833,14 @@ def report_file_delete(self, file_name):
        except UnicodeEncodeError:
            self.to_screen('Deleting existing file')

+    def raise_no_formats(self, has_drm=False, forced=False):
+        msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
+        expected = self.params.get('ignore_no_formats_error')
+        if forced or not expected:
+            raise ExtractorError(msg, expected=has_drm or expected)
+        else:
+            self.report_warning(msg)
+
    def parse_outtmpl(self):
        outtmpl_dict = self.params.get('outtmpl', {})
        if not isinstance(outtmpl_dict, dict):
@ -2151,11 +2165,12 @@ def sanitize_numeric_fields(info):
        else:
            formats = info_dict['formats']

+        if not self.params.get('allow_unplayable_formats'):
+            formats = [f for f in formats if not f.get('has_drm')]
+        info_dict['__has_drm'] = len(info_dict.get('formats') or ['']) > len(formats)
+
        if not formats:
-            if not self.params.get('ignore_no_formats_error'):
-                raise ExtractorError('No video formats found!')
-            else:
-                self.report_warning('No video formats found!')
+            self.raise_no_formats(info_dict.get('__has_drm'))

        def is_wellformed(f):
            url = f.get('url')
@ -2219,7 +2234,7 @@ def is_wellformed(f):

        # TODO Central sorting goes here

-        if formats and formats[0] is not info_dict:
+        if not formats or formats[0] is not info_dict:
            # only set the 'formats' fields if the original info_dict list them
            # otherwise we end up with a circular reference, the first (and unique)
            # element in the 'formats' field in info_dict is info_dict itself,
@ -2232,7 +2247,8 @@ def is_wellformed(f):
            self.list_thumbnails(info_dict)
        if self.params.get('listformats'):
            if not info_dict.get('formats') and not info_dict.get('url'):
-                raise ExtractorError('No video formats found', expected=True)
+                self.to_screen('%s has no formats' % info_dict['id'])
+            else:
                self.list_formats(info_dict)
        if self.params.get('listsubtitles'):
            if 'automatic_captions' in info_dict:
@ -2410,6 +2426,8 @@ def print_optional(field):
            self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

    def dl(self, name, info, subtitle=False, test=False):
+        if not info.get('url'):
+            self.raise_no_formats(info.get('__has_drm'), forced=True)

        if test:
            verbose = self.params.get('verbose')
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@ -549,7 +549,7 @@ def build_format_id(kind):
                    error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
            elif (not self.get_param('allow_unplayable_formats')
                    and sources and num_drm_sources == len(sources)):
-                raise ExtractorError('This video is DRM protected.', expected=True)
+                self.report_drm(video_id)

        self._sort_formats(formats)

--- a/yt_dlp/extractor/ceskatelevize.py
+++ b/yt_dlp/extractor/ceskatelevize.py
@ -147,9 +147,6 @@ def _real_extract(self, url):
                is_live = item.get('type') == 'LIVE'
                formats = []
                for format_id, stream_url in item.get('streamUrls', {}).items():
-                    if (not self.get_param('allow_unplayable_formats')
-                            and 'drmOnly=true' in stream_url):
-                        continue
                    if 'playerType=flash' in stream_url:
                        stream_formats = self._extract_m3u8_formats(
                            stream_url, playlist_id, 'mp4', 'm3u8_native',
@ -158,6 +155,9 @@ def _real_extract(self, url):
                        stream_formats = self._extract_mpd_formats(
                            stream_url, playlist_id,
                            mpd_id='dash-%s' % format_id, fatal=False)
+                    if 'drmOnly=true' in stream_url:
+                        for f in stream_formats:
+                            f['has_drm'] = True
                    # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
                    if format_id == 'audioDescription':
                        for f in stream_formats:
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -203,6 +203,7 @@ class InfoExtractor(object):
                                 width : height ratio as float.
                    * no_resume  The server does not support resuming the
                                 (HTTP or RTMP) download. Boolean.
+                    * has_drm    The format has DRM and cannot be downloaded. Boolean
                    * downloader_options  A dictionary of downloader options as
                                 described in FileDownloader
                    RTMP formats can also have the additional fields: page_url,
@ -1024,6 +1025,9 @@ def get_param(self, name, default=None, *args, **kwargs):
            return self._downloader.params.get(name, default, *args, **kwargs)
        return default

+    def report_drm(self, video_id, partial=False):
+        self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
+
    def report_extraction(self, id_or_name):
        """Report information extraction."""
        self.to_screen('%s: Extracting information' % id_or_name)
@ -1752,9 +1756,7 @@ def calculate_preference(self, format):

    def _sort_formats(self, formats, field_preference=[]):
        if not formats:
-            if self.get_param('ignore_no_formats_error'):
            return
-            raise ExtractorError('No video formats found')
        format_sort = self.FormatSort()  # params and to_screen are taken from the downloader
        format_sort.evaluate_params(self._downloader.params, field_preference)
        if self.get_param('verbose', False):
@ -1992,9 +1994,7 @@ def _parse_m3u8_formats_and_subtitles(
        if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
            return formats, subtitles

-        if (not self.get_param('allow_unplayable_formats')
-                and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)):  # Apple FairPlay
-            return formats, subtitles
+        has_drm = re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)

        def format_url(url):
            return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
@ -2040,6 +2040,7 @@ def _extract_m3u8_playlist_indices(*args, **kwargs):
                'protocol': entry_protocol,
                'preference': preference,
                'quality': quality,
+                'has_drm': has_drm,
            } for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]

            return formats, subtitles
@ -2573,8 +2574,6 @@ def extract_Initialization(source):
                        extract_Initialization(segment_template)
            return ms_info

-        skip_unplayable = not self.get_param('allow_unplayable_formats')
-
        mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
        formats = []
        subtitles = {}
@ -2585,12 +2584,8 @@ def extract_Initialization(source):
                'timescale': 1,
            })
            for adaptation_set in period.findall(_add_ns('AdaptationSet')):
-                if skip_unplayable and is_drm_protected(adaptation_set):
-                    continue
                adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
                for representation in adaptation_set.findall(_add_ns('Representation')):
-                    if skip_unplayable and is_drm_protected(representation):
-                        continue
                    representation_attrib = adaptation_set.attrib.copy()
                    representation_attrib.update(representation.attrib)
                    # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
@ -2662,6 +2657,8 @@ def extract_Initialization(source):
                            'acodec': 'none',
                            'vcodec': 'none',
                        }
+                    if is_drm_protected(adaptation_set) or is_drm_protected(representation):
+                        f['has_drm'] = True
                    representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)

                    def prepare_template(template_name, identifiers):
@ -2848,9 +2845,6 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
        """
        if ism_doc.get('IsLive') == 'TRUE':
            return [], {}
-        if (not self.get_param('allow_unplayable_formats')
-                and ism_doc.find('Protection') is not None):
-            return [], {}

        duration = int(ism_doc.attrib['Duration'])
        timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
@ -2941,6 +2935,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
                        'acodec': 'none' if stream_type == 'video' else fourcc,
                        'protocol': 'ism',
                        'fragments': fragments,
+                        'has_drm': ism_doc.find('Protection') is not None,
                        '_download_params': {
                            'stream_type': stream_type,
                            'duration': duration,
--- a/yt_dlp/extractor/corus.py
+++ b/yt_dlp/extractor/corus.py
@ -130,7 +130,7 @@ def _real_extract(self, url):
            formats.extend(self._parse_smil_formats(
                smil, smil_url, video_id, namespace))
        if not formats and video.get('drm'):
-            self.raise_no_formats('This video is DRM protected.', expected=True)
+            self.report_drm(video_id)
        self._sort_formats(formats)

        subtitles = {}
--- a/yt_dlp/extractor/crackle.py
+++ b/yt_dlp/extractor/crackle.py
@ -176,8 +176,8 @@ def _real_extract(self, url):
                    'width': mfs_info['width'],
                    'height': mfs_info['height'],
                })
-        if not formats and has_drm and not ignore_no_formats:
-            raise ExtractorError('The video is DRM protected', expected=True)
+        if not formats and has_drm:
+            self.report_drm(video_id)
        self._sort_formats(formats)

        description = media.get('Description')
--- a/yt_dlp/extractor/globo.py
+++ b/yt_dlp/extractor/globo.py
@ -97,7 +97,7 @@ def _real_extract(self, url):
            'http://api.globovideos.com/videos/%s/playlist' % video_id,
            video_id)['videos'][0]
        if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True:
-            raise ExtractorError('This video is DRM protected.', expected=True)
+            self.report_drm(video_id)

        title = video['title']

--- a/yt_dlp/extractor/hotstar.py
+++ b/yt_dlp/extractor/hotstar.py
@ -182,7 +182,8 @@ def _real_extract(self, url):
        title = video_data['title']

        if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
-            raise ExtractorError('This video is DRM protected.', expected=True)
+            self.report_drm(video_id)
+
        headers = {'Referer': 'https://www.hotstar.com/in'}
        formats = []
        subs = {}
--- a/yt_dlp/extractor/ninecninemedia.py
+++ b/yt_dlp/extractor/ninecninemedia.py
@ -4,7 +4,6 @@

 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
    float_or_none,
    int_or_none,
    parse_iso8601,
@ -35,7 +34,7 @@ def _real_extract(self, url):

        if (not self.get_param('allow_unplayable_formats')
                and try_get(content_package, lambda x: x['Constraints']['Security']['Type'])):
-            raise ExtractorError('This video is DRM protected.', expected=True)
+            self.report_drm(content_id)

        manifest_base_url = content_package_url + 'manifest.'
        formats = []
--- a/yt_dlp/extractor/ninenow.py
+++ b/yt_dlp/extractor/ninenow.py
@ -66,11 +66,12 @@ def _real_extract(self, url):

        video_data = common_data['video']

-        if not self.get_param('allow_unplayable_formats') and video_data.get('drm'):
-            raise ExtractorError('This video is DRM protected.', expected=True)
-
        brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId']
        video_id = compat_str(video_data.get('id') or brightcove_id)
+
+        if not self.get_param('allow_unplayable_formats') and video_data.get('drm'):
+            self.report_drm(video_id)
+
        title = common_data['name']

        thumbnails = [{
--- a/yt_dlp/extractor/npo.py
+++ b/yt_dlp/extractor/npo.py
@ -247,8 +247,7 @@ def _get_info(self, url, video_id):

        if not formats:
            if not self.get_param('allow_unplayable_formats') and drm:
-                self.raise_no_formats('This video is DRM protected.', expected=True)
-            return
+                self.report_drm(video_id)

        self._sort_formats(formats)

--- a/yt_dlp/extractor/prosiebensat1.py
+++ b/yt_dlp/extractor/prosiebensat1.py
@ -35,7 +35,7 @@ def _extract_video_info(self, url, clip_id):
            })[0]

        if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True:
-            raise ExtractorError('This video is DRM protected.', expected=True)
+            self.report_drm(clip_id)

        formats = []
        if self._ACCESS_ID:
--- a/yt_dlp/extractor/rai.py
+++ b/yt_dlp/extractor/rai.py
@ -281,7 +281,7 @@ def _real_extract(self, url):
                    (lambda x: x['rights_management']['rights']['drm'],
                     lambda x: x['program_info']['rights_management']['rights']['drm']),
                    dict):
-                raise ExtractorError('This video is DRM protected.', expected=True)
+                self.report_drm(video_id)

        title = media['name']
        video = media['video']
--- a/yt_dlp/extractor/ruutu.py
+++ b/yt_dlp/extractor/ruutu.py
@ -202,7 +202,7 @@ def pv(name):
        if not formats:
            if (not self.get_param('allow_unplayable_formats')
                    and xpath_text(video_xml, './Clip/DRM', default=None)):
-                self.raise_no_formats('This video is DRM protected.', expected=True)
+                self.report_drm(video_id)
            ns_st_cds = pv('ns_st_cds')
            if ns_st_cds != 'free':
                raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
--- a/yt_dlp/extractor/shahid.py
+++ b/yt_dlp/extractor/shahid.py
@ -119,7 +119,7 @@ def _real_extract(self, url):
            'playout/new/url/' + video_id, video_id)['playout']

        if not self.get_param('allow_unplayable_formats') and playout.get('drm'):
-            raise ExtractorError('This video is DRM protected.', expected=True)
+            self.report_drm(video_id)

        formats = self._extract_m3u8_formats(re.sub(
            # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
--- a/yt_dlp/extractor/sonyliv.py
+++ b/yt_dlp/extractor/sonyliv.py
@ -83,7 +83,7 @@ def _real_extract(self, url):
        content = self._call_api(
            '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
        if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'):
-            raise ExtractorError('This video is DRM protected.', expected=True)
+            self.report_drm(video_id)
        dash_url = content['videoURL']
        headers = {
            'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
--- a/yt_dlp/extractor/toggle.py
+++ b/yt_dlp/extractor/toggle.py
@ -155,8 +155,7 @@ def _real_extract(self, url):
            for meta in (info.get('Metas') or []):
                if (not self.get_param('allow_unplayable_formats')
                        and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'):
-                    self.raise_no_formats(
-                        'This video is DRM protected.', expected=True)
+                    self.report_drm(video_id)
            # Most likely because geo-blocked if no formats and no DRM
        self._sort_formats(formats)

--- a/yt_dlp/extractor/tv2.py
+++ b/yt_dlp/extractor/tv2.py
@ -103,7 +103,7 @@ def _real_extract(self, url):
                        'filesize': int_or_none(item.get('fileSize')),
                    })
        if not formats and data.get('drmProtected'):
-            self.raise_no_formats('This video is DRM protected.', expected=True)
+            self.report_drm(video_id)
        self._sort_formats(formats)

        thumbnails = [{
--- a/yt_dlp/extractor/vidio.py
+++ b/yt_dlp/extractor/vidio.py
@ -247,8 +247,7 @@ def _real_extract(self, url):
        formats = []
        if stream_meta.get('is_drm'):
            if not self.get_param('allow_unplayable_formats'):
-                self.raise_no_formats(
-                    'This video is DRM protected.', expected=True)
+                self.report_drm(video_id)
        if stream_meta.get('is_premium'):
            sources = self._download_json(
                'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,
--- a/yt_dlp/extractor/wakanim.py
+++ b/yt_dlp/extractor/wakanim.py
@ -3,7 +3,6 @@

 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
    merge_dicts,
    urljoin,
 )
@ -47,7 +46,7 @@ def _real_extract(self, url):
                r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
                m3u8_url, 'encryption', default=None)
            if encryption in ('cenc', 'cbcs-aapl'):
-                raise ExtractorError('This video is DRM protected.', expected=True)
+                self.report_drm(video_id)

        formats = self._extract_m3u8_formats(
            m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
--- a/yt_dlp/extractor/wat.py
+++ b/yt_dlp/extractor/wat.py
@ -92,7 +92,7 @@ def extract_formats(manifest_urls):
        extract_formats({delivery.get('format'): delivery.get('url')})
        if not formats:
            if delivery.get('drm'):
-                self.raise_no_formats('This video is DRM protected.', expected=True)
+                self.report_drm(video_id)
            manifest_urls = self._download_json(
                'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False)
            if manifest_urls:
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -2793,8 +2793,7 @@ def feed_entry(name):

        if not formats:
            if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
-                self.raise_no_formats(
-                    'This video is DRM protected.', expected=True)
+                self.report_drm(video_id)
            pemr = get_first(
                playability_statuses,
                ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@ -580,13 +580,11 @@ def _dict_from_options_callback(
    video_format.add_option(
        '--allow-unplayable-formats',
        action='store_true', dest='allow_unplayable_formats', default=False,
-        help=(
-            'Allow unplayable formats to be listed and downloaded. '
-            'All video post-processing will also be turned off'))
+        help=optparse.SUPPRESS_HELP)
    video_format.add_option(
        '--no-allow-unplayable-formats',
        action='store_false', dest='allow_unplayable_formats',
-        help='Do not allow unplayable formats to be listed or downloaded (default)')
+        help=optparse.SUPPRESS_HELP)

    subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
    subtitles.add_option(