From 334f41e0d827a361b4b8d39f55b66221281d92ae Mon Sep 17 00:00:00 2001 From: "Jeremie J. Jarosh" Date: Sat, 15 Apr 2017 10:12:08 -0500 Subject: [PATCH] [go90] Improve extraction - add metadata for 'series', 'episode', 'season', 'season_id', 'season_number', and 'episode_number' - integrate series title into the title - extract subtitles (fallback to `vtt` if the subtitle file type detection fails as that is the most likely extension) --- youtube_dl/extractor/go90.py | 38 ++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py index 3550eca7cb..9b2e1c1645 100644 --- a/youtube_dl/extractor/go90.py +++ b/youtube_dl/extractor/go90.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, parse_iso8601, ) @@ -18,7 +19,7 @@ class Go90IE(InfoExtractor): 'info_dict': { 'id': '84BUqjLpf9D', 'ext': 'mp4', - 'title': 'Inside The Utah Coalition Against Pornography Convention', + 'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention', 'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.', 'timestamp': 1491868800, 'upload_date': '20170411', @@ -32,11 +33,28 @@ def _real_extract(self, url): video_id, headers={ 'Content-Type': 'application/json; charset=utf-8', }, data=b'{"client":"web","device_type":"pc"}') - title = video_data['title'] main_video_asset = video_data['main_video_asset'] + episode_number = int_or_none(video_data.get('episode_number')) + series = None + season = None + season_id = None + season_number = None + for metadata in video_data.get('__children', {}).get('Item', {}).values(): + if metadata.get('type') == 'show': + series = metadata.get('title') + elif metadata.get('type') == 'season': + season = metadata.get('title') + season_id = metadata.get('id') + season_number = int_or_none(metadata.get('season_number')) + + title = episode = video_data.get('title') or series + if series and series != title: + title = '%s - %s' % (series, title) + thumbnails = [] formats = [] + subtitles = {} for asset in video_data.get('assets'): if asset.get('id') == main_video_asset: for source in asset.get('sources', []): @@ -70,6 +88,15 @@ def _real_extract(self, url): 'height': int_or_none(source.get('height')), 'tbr': int_or_none(source.get('bitrate')), }) + + for caption in asset.get('caption_metadata', []): + caption_url = caption.get('source_url') + if not caption_url: + continue + subtitles.setdefault(caption.get('language', 'en'), []).append({ + 'url': caption_url, + 'ext': determine_ext(caption_url, 'vtt'), + }) elif asset.get('type') == 'image': asset_location = asset.get('location') if not asset_location: @@ -89,4 +116,11 @@ def _real_extract(self, url): 'description': video_data.get('short_description'), 'like_count': int_or_none(video_data.get('like_count')), 'timestamp': parse_iso8601(video_data.get('released_at')), + 'series': series, + 'episode': episode, + 'season': season, + 'season_id': season_id, + 'season_number': season_number, + 'episode_number': episode_number, + 'subtitles': subtitles, }