mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 06:10:12 +01:00
[go,viu] Extract subtitles from the m3u8 manifest (#3219)
Authored by: fstirlitz
This commit is contained in:
parent
4628a3aa75
commit
47b8bf207b
|
@ -217,6 +217,7 @@ def _real_extract(self, url):
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
for asset in video_data.get('assets', {}).get('asset', []):
|
for asset in video_data.get('assets', {}).get('asset', []):
|
||||||
asset_url = asset.get('value')
|
asset_url = asset.get('value')
|
||||||
if not asset_url:
|
if not asset_url:
|
||||||
|
@ -256,8 +257,10 @@ def _real_extract(self, url):
|
||||||
error_message = ', '.join([error['message'] for error in errors])
|
error_message = ', '.join([error['message'] for error in errors])
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||||
formats.extend(self._extract_m3u8_formats(
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
|
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
else:
|
else:
|
||||||
f = {
|
f = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@ -281,7 +284,6 @@ def _real_extract(self, url):
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
for cc in video_data.get('closedcaption', {}).get('src', []):
|
for cc in video_data.get('closedcaption', {}).get('src', []):
|
||||||
cc_url = cc.get('value')
|
cc_url = cc.get('value')
|
||||||
if not cc_url:
|
if not cc_url:
|
||||||
|
|
|
@ -88,10 +88,9 @@ def _real_extract(self, url):
|
||||||
# r'(/hlsc_)[a-z]+(\d+\.m3u8)',
|
# r'(/hlsc_)[a-z]+(\d+\.m3u8)',
|
||||||
# r'\1whe\2', video_data['href'])
|
# r'\1whe\2', video_data['href'])
|
||||||
m3u8_url = video_data['href']
|
m3u8_url = video_data['href']
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
for key, value in video_data.items():
|
for key, value in video_data.items():
|
||||||
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user