From dfd14aadfa4546bac7883032d6fcba27d333e7c8 Mon Sep 17 00:00:00 2001 From: Samik Some Date: Fri, 7 Feb 2020 18:27:32 +0530 Subject: [PATCH] [roosterteeth.com] Add subtitle support (https://github.com/ytdl-org/youtube-dl/pull/23985) Closes #15 Authored by samiksome --- youtube_dlc/extractor/roosterteeth.py | 34 +++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/youtube_dlc/extractor/roosterteeth.py b/youtube_dlc/extractor/roosterteeth.py index 8883639b29..1ad6226b68 100644 --- a/youtube_dlc/extractor/roosterteeth.py +++ b/youtube_dlc/extractor/roosterteeth.py @@ -86,9 +86,11 @@ def _real_extract(self, url): api_episode_url = self._EPISODE_BASE_URL + display_id try: - m3u8_url = self._download_json( + video_data = self._download_json( api_episode_url + '/videos', display_id, - 'Downloading video JSON metadata')['data'][0]['attributes']['url'] + 'Downloading video JSON metadata')['data'][0] + m3u8_url = video_data['attributes']['url'] + subtitle_m3u8_url = video_data['links']['download'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: @@ -119,6 +121,33 @@ def _real_extract(self, url): 'url': img_url, }) + subtitles = {} + res = self._download_webpage_handle( + subtitle_m3u8_url, display_id, + 'Downloading m3u8 information', + 'Failed to download m3u8 information', + fatal=True, data=None, headers={}, query={}) + if res is not False: + subtitle_m3u8_doc, _ = res + for line in subtitle_m3u8_doc.split('\n'): + if 'EXT-X-MEDIA:TYPE=SUBTITLES' in line: + parts = line.split(',') + for part in parts: + if 'LANGUAGE' in part: + lang = part[part.index('=') + 2:-1] + elif 'URI' in part: + uri = part[part.index('=') + 2:-1] + res = self._download_webpage_handle( + uri, display_id, + 'Downloading m3u8 information', + 'Failed to download m3u8 information', + fatal=True, data=None, headers={}, query={}) + doc, _ = res + for l in doc.split('\n'): + if not l.startswith('#'): + subtitles[lang] = [{'url': uri[:-uri[::-1].index('/')] + l}] + break + return { 'id': video_id, 'display_id': display_id, @@ -134,4 +163,5 @@ def _real_extract(self, url): 'formats': formats, 'channel_id': attributes.get('channel_id'), 'duration': int_or_none(attributes.get('length')), + 'subtitles': subtitles }