[extractor] Better error message for DRM (#729)

Closes #636
This commit is contained in:
pukkandan 2021-08-23 01:38:38 +05:30 committed by GitHub
parent 9b5fa9ee7c
commit 88acdbc269
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 66 additions and 62 deletions

View File

@ -668,11 +668,6 @@ ## Video Format Options:
bestvideo+bestaudio), output to given
container format. One of mkv, mp4, ogg,
webm, flv. Ignored if no merge is required
--allow-unplayable-formats Allow unplayable formats to be listed and
downloaded. All video post-processing will
also be turned off
--no-allow-unplayable-formats Do not allow unplayable formats to be
listed or downloaded (default)
## Subtitle Options:
--write-subs Write subtitle file
@ -1470,6 +1465,8 @@ #### Developer options
--test Download only part of video for testing extractors
--youtube-print-sig-code For testing youtube signatures
--allow-unplayable-formats List unplayable formats also
--no-allow-unplayable-formats Default
#### Old aliases

View File

@ -497,6 +497,12 @@ def __init__(self, params=None, auto_init=True):
self.report_warning(
'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
if self.params.get('allow_unplayable_formats'):
self.report_warning(
'You have asked for unplayable formats to be listed/downloaded. '
'This is a developer option intended for debugging. '
'If you experience any issues while using this option, DO NOT open a bug report')
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
@ -827,6 +833,14 @@ def report_file_delete(self, file_name):
except UnicodeEncodeError:
self.to_screen('Deleting existing file')
def raise_no_formats(self, has_drm=False, forced=False):
msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
expected = self.params.get('ignore_no_formats_error')
if forced or not expected:
raise ExtractorError(msg, expected=has_drm or expected)
else:
self.report_warning(msg)
def parse_outtmpl(self):
outtmpl_dict = self.params.get('outtmpl', {})
if not isinstance(outtmpl_dict, dict):
@ -2151,11 +2165,12 @@ def sanitize_numeric_fields(info):
else:
formats = info_dict['formats']
if not self.params.get('allow_unplayable_formats'):
formats = [f for f in formats if not f.get('has_drm')]
info_dict['__has_drm'] = len(info_dict.get('formats') or ['']) > len(formats)
if not formats:
if not self.params.get('ignore_no_formats_error'):
raise ExtractorError('No video formats found!')
else:
self.report_warning('No video formats found!')
self.raise_no_formats(info_dict.get('__has_drm'))
def is_wellformed(f):
url = f.get('url')
@ -2219,7 +2234,7 @@ def is_wellformed(f):
# TODO Central sorting goes here
if formats and formats[0] is not info_dict:
if not formats or formats[0] is not info_dict:
# only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique)
# element in the 'formats' field in info_dict is info_dict itself,
@ -2232,7 +2247,8 @@ def is_wellformed(f):
self.list_thumbnails(info_dict)
if self.params.get('listformats'):
if not info_dict.get('formats') and not info_dict.get('url'):
raise ExtractorError('No video formats found', expected=True)
self.to_screen('%s has no formats' % info_dict['id'])
else:
self.list_formats(info_dict)
if self.params.get('listsubtitles'):
if 'automatic_captions' in info_dict:
@ -2410,6 +2426,8 @@ def print_optional(field):
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
def dl(self, name, info, subtitle=False, test=False):
if not info.get('url'):
self.raise_no_formats(info.get('__has_drm'), forced=True)
if test:
verbose = self.params.get('verbose')

View File

@ -549,7 +549,7 @@ def build_format_id(kind):
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
elif (not self.get_param('allow_unplayable_formats')
and sources and num_drm_sources == len(sources)):
raise ExtractorError('This video is DRM protected.', expected=True)
self.report_drm(video_id)
self._sort_formats(formats)

View File

@ -147,9 +147,6 @@ def _real_extract(self, url):
is_live = item.get('type') == 'LIVE'
formats = []
for format_id, stream_url in item.get('streamUrls', {}).items():
if (not self.get_param('allow_unplayable_formats')
and 'drmOnly=true' in stream_url):
continue
if 'playerType=flash' in stream_url:
stream_formats = self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', 'm3u8_native',
@ -158,6 +155,9 @@ def _real_extract(self, url):
stream_formats = self._extract_mpd_formats(
stream_url, playlist_id,
mpd_id='dash-%s' % format_id, fatal=False)
if 'drmOnly=true' in stream_url:
for f in stream_formats:
f['has_drm'] = True
# See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
if format_id == 'audioDescription':
for f in stream_formats:

View File

@ -203,6 +203,7 @@ class InfoExtractor(object):
width : height ratio as float.
* no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean.
* has_drm The format has DRM and cannot be downloaded. Boolean
* downloader_options A dictionary of downloader options as
described in FileDownloader
RTMP formats can also have the additional fields: page_url,
@ -1024,6 +1025,9 @@ def get_param(self, name, default=None, *args, **kwargs):
return self._downloader.params.get(name, default, *args, **kwargs)
return default
def report_drm(self, video_id, partial=False):
self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
def report_extraction(self, id_or_name):
"""Report information extraction."""
self.to_screen('%s: Extracting information' % id_or_name)
@ -1752,9 +1756,7 @@ def calculate_preference(self, format):
def _sort_formats(self, formats, field_preference=[]):
if not formats:
if self.get_param('ignore_no_formats_error'):
return
raise ExtractorError('No video formats found')
format_sort = self.FormatSort() # params and to_screen are taken from the downloader
format_sort.evaluate_params(self._downloader.params, field_preference)
if self.get_param('verbose', False):
@ -1992,9 +1994,7 @@ def _parse_m3u8_formats_and_subtitles(
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
return formats, subtitles
if (not self.get_param('allow_unplayable_formats')
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
return formats, subtitles
has_drm = re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)
def format_url(url):
return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
@ -2040,6 +2040,7 @@ def _extract_m3u8_playlist_indices(*args, **kwargs):
'protocol': entry_protocol,
'preference': preference,
'quality': quality,
'has_drm': has_drm,
} for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]
return formats, subtitles
@ -2573,8 +2574,6 @@ def extract_Initialization(source):
extract_Initialization(segment_template)
return ms_info
skip_unplayable = not self.get_param('allow_unplayable_formats')
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats = []
subtitles = {}
@ -2585,12 +2584,8 @@ def extract_Initialization(source):
'timescale': 1,
})
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
if skip_unplayable and is_drm_protected(adaptation_set):
continue
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
for representation in adaptation_set.findall(_add_ns('Representation')):
if skip_unplayable and is_drm_protected(representation):
continue
representation_attrib = adaptation_set.attrib.copy()
representation_attrib.update(representation.attrib)
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
@ -2662,6 +2657,8 @@ def extract_Initialization(source):
'acodec': 'none',
'vcodec': 'none',
}
if is_drm_protected(adaptation_set) or is_drm_protected(representation):
f['has_drm'] = True
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
def prepare_template(template_name, identifiers):
@ -2848,9 +2845,6 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
"""
if ism_doc.get('IsLive') == 'TRUE':
return [], {}
if (not self.get_param('allow_unplayable_formats')
and ism_doc.find('Protection') is not None):
return [], {}
duration = int(ism_doc.attrib['Duration'])
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
@ -2941,6 +2935,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
'acodec': 'none' if stream_type == 'video' else fourcc,
'protocol': 'ism',
'fragments': fragments,
'has_drm': ism_doc.find('Protection') is not None,
'_download_params': {
'stream_type': stream_type,
'duration': duration,

View File

@ -130,7 +130,7 @@ def _real_extract(self, url):
formats.extend(self._parse_smil_formats(
smil, smil_url, video_id, namespace))
if not formats and video.get('drm'):
self.raise_no_formats('This video is DRM protected.', expected=True)
self.report_drm(video_id)
self._sort_formats(formats)
subtitles = {}

View File

@ -176,8 +176,8 @@ def _real_extract(self, url):
'width': mfs_info['width'],
'height': mfs_info['height'],
})
if not formats and has_drm and not ignore_no_formats:
raise ExtractorError('The video is DRM protected', expected=True)
if not formats and has_drm:
self.report_drm(video_id)
self._sort_formats(formats)
description = media.get('Description')

View File

@ -97,7 +97,7 @@ def _real_extract(self, url):
'http://api.globovideos.com/videos/%s/playlist' % video_id,
video_id)['videos'][0]
if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True:
raise ExtractorError('This video is DRM protected.', expected=True)
self.report_drm(video_id)
title = video['title']

View File

@ -182,7 +182,8 @@ def _real_extract(self, url):
title = video_data['title']
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
raise ExtractorError('This video is DRM protected.', expected=True)
self.report_drm(video_id)
headers = {'Referer': 'https://www.hotstar.com/in'}
formats = []
subs = {}

View File

@ -4,7 +4,6 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
@ -35,7 +34,7 @@ def _real_extract(self, url):
if (not self.get_param('allow_unplayable_formats')
and try_get(content_package, lambda x: x['Constraints']['Security']['Type'])):
raise ExtractorError('This video is DRM protected.', expected=True)
self.report_drm(content_id)
manifest_base_url = content_package_url + 'manifest.'
formats = []

View File

@ -66,11 +66,12 @@ def _real_extract(self, url):
video_data = common_data['video']
if not self.get_param('allow_unplayable_formats') and video_data.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId']
video_id = compat_str(video_data.get('id') or brightcove_id)
if not self.get_param('allow_unplayable_formats') and video_data.get('drm'):
self.report_drm(video_id)
title = common_data['name']
thumbnails = [{

View File

@ -247,8 +247,7 @@ def _get_info(self, url, video_id):
if not formats:
if not self.get_param('allow_unplayable_formats') and drm:
self.raise_no_formats('This video is DRM protected.', expected=True)
return
self.report_drm(video_id)
self._sort_formats(formats)

View File

@ -35,7 +35,7 @@ def _extract_video_info(self, url, clip_id):
})[0]
if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True:
raise ExtractorError('This video is DRM protected.', expected=True)
self.report_drm(clip_id)
formats = []
if self._ACCESS_ID:

View File

@ -281,7 +281,7 @@ def _real_extract(self, url):
(lambda x: x['rights_management']['rights']['drm'],
lambda x: x['program_info']['rights_management']['rights']['drm']),
dict):
raise ExtractorError('This video is DRM protected.', expected=True)
self.report_drm(video_id)
title = media['name']
video = media['video']

View File

@ -202,7 +202,7 @@ def pv(name):
if not formats:
if (not self.get_param('allow_unplayable_formats')
and xpath_text(video_xml, './Clip/DRM', default=None)):
self.raise_no_formats('This video is DRM protected.', expected=True)
self.report_drm(video_id)
ns_st_cds = pv('ns_st_cds')
if ns_st_cds != 'free':
raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)

View File

@ -119,7 +119,7 @@ def _real_extract(self, url):
'playout/new/url/' + video_id, video_id)['playout']
if not self.get_param('allow_unplayable_formats') and playout.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
self.report_drm(video_id)
formats = self._extract_m3u8_formats(re.sub(
# https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html

View File

@ -83,7 +83,7 @@ def _real_extract(self, url):
content = self._call_api(
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'):
raise ExtractorError('This video is DRM protected.', expected=True)
self.report_drm(video_id)
dash_url = content['videoURL']
headers = {
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)

View File

@ -155,8 +155,7 @@ def _real_extract(self, url):
for meta in (info.get('Metas') or []):
if (not self.get_param('allow_unplayable_formats')
and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'):
self.raise_no_formats(
'This video is DRM protected.', expected=True)
self.report_drm(video_id)
# Most likely because geo-blocked if no formats and no DRM
self._sort_formats(formats)

View File

@ -103,7 +103,7 @@ def _real_extract(self, url):
'filesize': int_or_none(item.get('fileSize')),
})
if not formats and data.get('drmProtected'):
self.raise_no_formats('This video is DRM protected.', expected=True)
self.report_drm(video_id)
self._sort_formats(formats)
thumbnails = [{

View File

@ -247,8 +247,7 @@ def _real_extract(self, url):
formats = []
if stream_meta.get('is_drm'):
if not self.get_param('allow_unplayable_formats'):
self.raise_no_formats(
'This video is DRM protected.', expected=True)
self.report_drm(video_id)
if stream_meta.get('is_premium'):
sources = self._download_json(
'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,

View File

@ -3,7 +3,6 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
merge_dicts,
urljoin,
)
@ -47,7 +46,7 @@ def _real_extract(self, url):
r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
m3u8_url, 'encryption', default=None)
if encryption in ('cenc', 'cbcs-aapl'):
raise ExtractorError('This video is DRM protected.', expected=True)
self.report_drm(video_id)
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',

View File

@ -92,7 +92,7 @@ def extract_formats(manifest_urls):
extract_formats({delivery.get('format'): delivery.get('url')})
if not formats:
if delivery.get('drm'):
self.raise_no_formats('This video is DRM protected.', expected=True)
self.report_drm(video_id)
manifest_urls = self._download_json(
'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False)
if manifest_urls:

View File

@ -2793,8 +2793,7 @@ def feed_entry(name):
if not formats:
if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
self.raise_no_formats(
'This video is DRM protected.', expected=True)
self.report_drm(video_id)
pemr = get_first(
playability_statuses,
('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

View File

@ -580,13 +580,11 @@ def _dict_from_options_callback(
video_format.add_option(
'--allow-unplayable-formats',
action='store_true', dest='allow_unplayable_formats', default=False,
help=(
'Allow unplayable formats to be listed and downloaded. '
'All video post-processing will also be turned off'))
help=optparse.SUPPRESS_HELP)
video_format.add_option(
'--no-allow-unplayable-formats',
action='store_false', dest='allow_unplayable_formats',
help='Do not allow unplayable formats to be listed or downloaded (default)')
help=optparse.SUPPRESS_HELP)
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
subtitles.add_option(