[wistia] improve format extraction and extract subtitles(closes #22590)

This commit is contained in:
Remita Amine 2020-01-05 21:08:50 +01:00
parent 259ad38173
commit 233826f68f

View File

@ -13,8 +13,7 @@
class WistiaIE(InfoExtractor): class WistiaIE(InfoExtractor):
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})' _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
_TESTS = [{ _TESTS = [{
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
@ -67,10 +66,10 @@ def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
data_json = self._download_json( data_json = self._download_json(
self._API_URL % video_id, video_id, self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
# Some videos require this. # Some videos require this.
headers={ headers={
'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id, 'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
}) })
if data_json.get('error'): if data_json.get('error'):
@ -95,27 +94,61 @@ def _real_extract(self, url):
'url': aurl, 'url': aurl,
'width': int_or_none(a.get('width')), 'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')), 'height': int_or_none(a.get('height')),
'filesize': int_or_none(a.get('size')),
}) })
else: else:
aext = a.get('ext') aext = a.get('ext')
is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' display_name = a.get('display_name')
formats.append({ format_id = atype
'format_id': atype, if atype and atype.endswith('_video') and display_name:
format_id = '%s-%s' % (atype[:-6], display_name)
f = {
'format_id': format_id,
'url': aurl, 'url': aurl,
'tbr': int_or_none(a.get('bitrate')), 'tbr': int_or_none(a.get('bitrate')) or None,
'vbr': int_or_none(a.get('opt_vbitrate')), 'preference': 1 if atype == 'original' else None,
}
if display_name == 'Audio':
f.update({
'vcodec': 'none',
})
else:
f.update({
'width': int_or_none(a.get('width')), 'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')), 'height': int_or_none(a.get('height')),
'filesize': int_or_none(a.get('size')),
'vcodec': a.get('codec'), 'vcodec': a.get('codec'),
'container': a.get('container'),
'ext': 'mp4' if is_m3u8 else aext,
'protocol': 'm3u8' if is_m3u8 else None,
'preference': 1 if atype == 'original' else None,
}) })
if a.get('container') == 'm3u8' or aext == 'm3u8':
ts_f = f.copy()
ts_f.update({
'ext': 'ts',
'format_id': f['format_id'].replace('hls-', 'ts-'),
'url': f['url'].replace('.bin', '.ts'),
})
formats.append(ts_f)
f.update({
'ext': 'mp4',
'protocol': 'm3u8_native',
})
else:
f.update({
'container': a.get('container'),
'ext': aext,
'filesize': int_or_none(a.get('size')),
})
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
for caption in data.get('captions', []):
language = caption.get('language')
if not language:
continue
subtitles[language] = [{
'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language,
}]
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -124,4 +157,5 @@ def _real_extract(self, url):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(data.get('duration')), 'duration': float_or_none(data.get('duration')),
'timestamp': int_or_none(data.get('createdAt')), 'timestamp': int_or_none(data.get('createdAt')),
'subtitles': subtitles,
} }