From aa56061627f9871b4793414b71a26976befd3a9c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 13 Jun 2018 16:46:59 +0100 Subject: [PATCH] [discoverynetworks] Add support for disco-api videos(closes #16724) --- youtube_dl/extractor/discoverynetworks.py | 19 ++- youtube_dl/extractor/dplay.py | 137 +++++++++++----------- 2 files changed, 87 insertions(+), 69 deletions(-) diff --git a/youtube_dl/extractor/discoverynetworks.py b/youtube_dl/extractor/discoverynetworks.py index b6653784cc..fba1ef2218 100644 --- a/youtube_dl/extractor/discoverynetworks.py +++ b/youtube_dl/extractor/discoverynetworks.py @@ -3,8 +3,8 @@ import re -from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE +from .dplay import DPlayIE from ..compat import ( compat_parse_qs, compat_urlparse, @@ -12,8 +12,13 @@ from ..utils import smuggle_url -class DiscoveryNetworksDeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P\d+)|(?:[^/]+/)*videos/(?P[^/?#]+))' +class DiscoveryNetworksDeIE(DPlayIE): + _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/ + (?: + .*\#(?P<id>\d+)| + (?:[^/]+/)*videos/(?P<display_id>[^/?#]+)| + programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+) + )''' _TESTS = [{ 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', @@ -40,6 +45,14 @@ class DiscoveryNetworksDeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + alternate_id = mobj.group('alternate_id') + if alternate_id: + self._initialize_geo_bypass({ + 'countries': ['DE'], + }) + return self._get_disco_api_info( + url, '%s/%s' % (mobj.group('programme'), alternate_id), + 'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de') brightcove_id = mobj.group('id') if not brightcove_id: title = mobj.group('title') diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 8e03743207..fe47f6dcef 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -97,6 +97,75 @@ class DPlayIE(InfoExtractor): 'only_matching': True, }] + def _get_disco_api_info(self, url, display_id, disco_host, realm): + disco_base = 'https://' + disco_host + token = self._download_json( + '%s/token' % disco_base, display_id, 'Downloading token', + query={ + 'realm': realm, + })['data']['attributes']['token'] + headers = { + 'Referer': url, + 'Authorization': 'Bearer ' + token, + } + video = self._download_json( + '%s/content/videos/%s' % (disco_base, display_id), display_id, + headers=headers, query={ + 'include': 'show' + }) + video_id = video['data']['id'] + info = video['data']['attributes'] + title = info['name'] + formats = [] + for format_id, format_dict in self._download_json( + '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), + display_id, headers=headers)['data']['attributes']['streaming'].items(): + if not isinstance(format_dict, dict): + continue + format_url = format_dict.get('url') + if not format_url: + continue + ext = determine_ext(format_url) + if format_id == 'dash' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, display_id, mpd_id='dash', fatal=False)) + elif format_id == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + + series = None + try: + included = video.get('included') + if isinstance(included, list): + show = next(e for e in included if e.get('type') == 'show') + series = try_get( + show, lambda x: x['attributes']['name'], compat_str) + except StopIteration: + pass + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': info.get('description'), + 'duration': float_or_none( + info.get('videoDuration'), scale=1000), + 'timestamp': unified_timestamp(info.get('publishStart')), + 'series': series, + 'season_number': int_or_none(info.get('seasonNumber')), + 'episode_number': int_or_none(info.get('episodeNumber')), + 'age_limit': int_or_none(info.get('minimum_age')), + 'formats': formats, + } + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id = mobj.group('id') @@ -113,72 +182,8 @@ def _real_extract(self, url): if not video_id: host = mobj.group('host') - disco_base = 'https://disco-api.%s' % host - self._download_json( - '%s/token' % disco_base, display_id, 'Downloading token', - query={ - 'realm': host.replace('.', ''), - }) - video = self._download_json( - '%s/content/videos/%s' % (disco_base, display_id), display_id, - headers={ - 'Referer': url, - 'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1', - }, query={ - 'include': 'show' - }) - video_id = video['data']['id'] - info = video['data']['attributes'] - title = info['name'] - formats = [] - for format_id, format_dict in self._download_json( - '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), - display_id)['data']['attributes']['streaming'].items(): - if not isinstance(format_dict, dict): - continue - format_url = format_dict.get('url') - if not format_url: - continue - ext = determine_ext(format_url) - if format_id == 'dash' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, display_id, mpd_id='dash', fatal=False)) - elif format_id == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, display_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - }) - self._sort_formats(formats) - - series = None - try: - included = video.get('included') - if isinstance(included, list): - show = next(e for e in included if e.get('type') == 'show') - series = try_get( - show, lambda x: x['attributes']['name'], compat_str) - except StopIteration: - pass - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': info.get('description'), - 'duration': float_or_none( - info.get('videoDuration'), scale=1000), - 'timestamp': unified_timestamp(info.get('publishStart')), - 'series': series, - 'season_number': int_or_none(info.get('seasonNumber')), - 'episode_number': int_or_none(info.get('episodeNumber')), - 'age_limit': int_or_none(info.get('minimum_age')), - 'formats': formats, - } + return self._get_disco_api_info( + url, display_id, 'disco-api.' + host, host.replace('.', '')) info = self._download_json( 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),