From 27b83249c90fa0500bff68b5aee7874100bba5e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 20 Jan 2016 20:00:51 +0600 Subject: [PATCH] [tube8] Fix extraction and extract all formats (Closes #8281) --- youtube_dl/extractor/tube8.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 46ef61ff5..6d7ca4874 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse +from ..compat import compat_str from ..utils import ( int_or_none, sanitized_Request, @@ -44,14 +43,28 @@ def _real_extract(self, url): req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, display_id) - flashvars = json.loads(self._html_search_regex( - r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars')) + flashvars = self._parse_json( + self._search_regex( + r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'), + video_id) - video_url = flashvars['video_url'] - if flashvars.get('encrypted') is True: - video_url = aes_decrypt_text(video_url, flashvars['video_title'], 32).decode('utf-8') - path = compat_urllib_parse_urlparse(video_url).path - format_id = '-'.join(path.split('/')[4].split('_')[:2]) + formats = [] + for key, video_url in flashvars.items(): + if not isinstance(video_url, compat_str) or not video_url.startswith('http'): + continue + height = self._search_regex( + r'quality_(\d+)[pP]', key, 'height', default=None) + if not height: + continue + if flashvars.get('encrypted') is True: + video_url = aes_decrypt_text( + video_url, flashvars['video_title'], 32).decode('utf-8') + formats.append({ + 'url': video_url, + 'format_id': '%sp' % height, + 'height': int(height), + }) + self._sort_formats(formats) thumbnail = flashvars.get('image_url') @@ -79,15 +92,14 @@ def _real_extract(self, url): return { 'id': video_id, 'display_id': display_id, - 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, - 'format_id': format_id, 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, 'age_limit': 18, + 'formats': formats, }