diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index de6e8ee30f..4a1bcfa354 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -315,6 +315,7 @@ from .spiegel import SpiegelIE from .spiegeltv import SpiegeltvIE from .spike import SpikeIE +from .sportdeutschland import SportDeutschlandIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamcloud import StreamcloudIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 69d5f687cb..9ece303080 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -636,6 +636,55 @@ def _extract_f4m_formats(self, manifest_url, video_id): return formats + def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None): + formats = [{ + 'format_id': 'm3u8-meta', + 'url': m3u8_url, + 'ext': ext, + 'protocol': 'm3u8', + 'preference': -1, + 'resolution': 'multiple', + 'format_note': 'Quality selection URL', + }] + + m3u8_doc = self._download_webpage(m3u8_url, video_id) + last_info = None + kv_rex = re.compile( + r'(?P[a-zA-Z_-]+)=(?P"[^"]+"|[^",]+)(?:,|$)') + for line in m3u8_doc.splitlines(): + if line.startswith('#EXT-X-STREAM-INF:'): + last_info = {} + for m in kv_rex.finditer(line): + v = m.group('val') + if v.startswith('"'): + v = v[1:-1] + last_info[m.group('key')] = v + elif line.startswith('#') or not line.strip(): + continue + else: + tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) + + f = { + 'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), + 'url': line.strip(), + 'tbr': tbr, + 'ext': ext, + } + codecs = last_info.get('CODECS') + if codecs: + video, audio = codecs.split(',') + f['vcodec'] = video.partition('.')[0] + f['acodec'] = audio.partition('.')[0] + resolution = last_info.get('RESOLUTION') + if resolution: + width_str, height_str = resolution.split('x') + f['width'] = int(width_str) + f['height'] = int(height_str) + formats.append(f) + last_info = {} + self._sort_formats(formats) + return formats + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py new file mode 100644 index 0000000000..9d54043e5b --- /dev/null +++ b/youtube_dl/extractor/sportdeutschland.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, + parse_iso8601, +) + + +class SportDeutschlandIE(InfoExtractor): + _VALID_URL = r'https?://sportdeutschland\.tv/(?P[^/?#]+)/(?P[^?#/]+)(?:$|[?#])' + _TEST = { + 'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen', + 'info_dict': { + 'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen', + 'ext': 'mp4', + 'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen', + 'categories': ['Badminton'], + 'view_count': int, + 'thumbnail': 're:^https?://.*\.jpg', + 'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE', + 'timestamp': 1409043600, + 'upload_date': '20140826', + }, + 'params': { + 'skip_download': 'Live stream', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + sport_id = mobj.group('sport') + + api_url = 'http://splink.tv/api/permalinks/%s/%s' % ( + sport_id, video_id) + req = compat_urllib_request.Request(api_url, headers={ + 'Accept': 'application/vnd.vidibus.v2.html+json', + 'Referer': url, + }) + data = self._download_json(req, video_id) + + categories = list(data.get('section', {}).get('tags', {}).values()) + asset = data['asset'] + + smil_url = asset['video'] + m3u8_url = smil_url.replace('.smil', '.m3u8') + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + + smil_doc = self._download_xml( + smil_url, video_id, note='Downloading SMIL metadata') + base_url = smil_doc.find('./head/meta').attrib['base'] + formats.extend([{ + 'format_id': 'rmtp', + 'url': base_url, + 'play_path': n.attrib['src'], + 'ext': 'flv', + 'preference': -100, + 'format_note': 'Seems to fail at example stream', + } for n in smil_doc.findall('./body/video')]) + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': asset['title'], + 'thumbnail': asset.get('image'), + 'description': asset.get('teaser'), + 'categories': categories, + 'view_count': asset.get('views'), + 'rtmp_live': asset['live'], + 'timestamp': parse_iso8601(asset.get('date')), + } +