From dca4f462742d64ab82e943c7560d0d9cbc5a9305 Mon Sep 17 00:00:00 2001 From: marieell Date: Fri, 11 Feb 2022 18:46:29 +0100 Subject: [PATCH] [cleanup] Remove extractors for some dead websites (#2739) 90tv.ir, HornBunny.com, 220.ro, 5min.com, Kankan.com, Roxwel.com, FreshLive.tv, TheScene.com, Vube.com Authored by: marieell --- supportedsites.md | 9 -- yt_dlp/extractor/engadget.py | 10 -- yt_dlp/extractor/extractors.py | 9 -- yt_dlp/extractor/fivemin.py | 54 ----------- yt_dlp/extractor/freshlive.py | 80 ---------------- yt_dlp/extractor/generic.py | 24 ----- yt_dlp/extractor/hornbunny.py | 49 ---------- yt_dlp/extractor/huffpost.py | 3 - yt_dlp/extractor/ir90tv.py | 42 -------- yt_dlp/extractor/kankan.py | 48 ---------- yt_dlp/extractor/ro220.py | 43 --------- yt_dlp/extractor/roxwel.py | 52 ---------- yt_dlp/extractor/thescene.py | 44 --------- yt_dlp/extractor/vube.py | 170 --------------------------------- 14 files changed, 637 deletions(-) delete mode 100644 yt_dlp/extractor/fivemin.py delete mode 100644 yt_dlp/extractor/freshlive.py delete mode 100644 yt_dlp/extractor/hornbunny.py delete mode 100644 yt_dlp/extractor/ir90tv.py delete mode 100644 yt_dlp/extractor/kankan.py delete mode 100644 yt_dlp/extractor/ro220.py delete mode 100644 yt_dlp/extractor/roxwel.py delete mode 100644 yt_dlp/extractor/thescene.py delete mode 100644 yt_dlp/extractor/vube.py diff --git a/supportedsites.md b/supportedsites.md index 7c4b9bee9a..7166dc53ac 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -3,7 +3,6 @@ # Supported sites - **17live:clip** - **1tv**: Первый канал - **20min** - - **220.ro** - **23video** - **247sports** - **24video** @@ -11,7 +10,6 @@ # Supported sites - **3sat** - **4tube** - **56.com** - - **5min** - **6play** - **7plus** - **8tracks** @@ -381,7 +379,6 @@ # Supported sites - **FranceTVSite** - **Freesound** - **freespeech.org** - - **FreshLive** - **FrontendMasters** - **FrontendMastersCourse** - **FrontendMastersLesson** @@ -454,7 +451,6 @@ # Supported sites - **hitbox:live** - **HitRecord** - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau - - **HornBunny** - **HotNewHipHop** - **hotstar** - **hotstar:playlist** @@ -499,7 +495,6 @@ # Supported sites - **iq.com**: International version of iQiyi - **iq.com:album** - **iqiyi**: 爱奇艺 - - **Ir90Tv** - **ITTF** - **ITV** - **ITVBTCC** @@ -516,7 +511,6 @@ # Supported sites - **JWPlatform** - **Kakao** - **Kaltura** - - **Kankan** - **Karaoketv** - **KarriereVideos** - **Katsomo** @@ -989,7 +983,6 @@ # Supported sites - **RoosterTeeth** - **RoosterTeethSeries** - **RottenTomatoes** - - **Roxwel** - **Rozhlas** - **RTBF** - **RTDocumentry** @@ -1181,7 +1174,6 @@ # Supported sites - **TheIntercept** - **ThePlatform** - **ThePlatformFeed** - - **TheScene** - **TheStar** - **TheSun** - **ThetaStream** @@ -1388,7 +1380,6 @@ # Supported sites - **VShare** - **VTM** - **VTXTV** - - **vube**: Vube.com - **VuClip** - **Vupload** - **VVVVID** diff --git a/yt_dlp/extractor/engadget.py b/yt_dlp/extractor/engadget.py index 65635c18b7..733bf322fd 100644 --- a/yt_dlp/extractor/engadget.py +++ b/yt_dlp/extractor/engadget.py @@ -7,16 +7,6 @@ class EngadgetIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P[^/?#]+)' _TESTS = [{ - # video with 5min ID - 'url': 'http://www.engadget.com/video/518153925/', - 'md5': 'c6820d4828a5064447a4d9fc73f312c9', - 'info_dict': { - 'id': '518153925', - 'ext': 'mp4', - 'title': 'Samsung Galaxy Tab Pro 8.4 Review', - }, - 'add_ie': ['FiveMin'], - }, { # video with vidible ID 'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/', 'only_matching': True, diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bd514f958d..6a9f9250a6 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -481,7 +481,6 @@ ) from .filmweb import FilmwebIE from .firsttv import FirstTVIE -from .fivemin import FiveMinIE from .fivetv import FiveTVIE from .flickr import FlickrIE from .folketinget import FolketingetIE @@ -513,7 +512,6 @@ ) from .freesound import FreesoundIE from .freespeech import FreespeechIE -from .freshlive import FreshLiveIE from .frontendmasters import ( FrontendMastersIE, FrontendMastersLessonIE, @@ -585,7 +583,6 @@ from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE from .hitrecord import HitRecordIE -from .hornbunny import HornBunnyIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, @@ -655,7 +652,6 @@ IqIE, IqAlbumIE ) -from .ir90tv import Ir90TvIE from .itv import ( ITVIE, ITVBTCCIE, @@ -677,7 +673,6 @@ from .jwplatform import JWPlatformIE from .kakao import KakaoIE from .kaltura import KalturaIE -from .kankan import KankanIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE @@ -1320,11 +1315,9 @@ from .reverbnation import ReverbNationIE from .rice import RICEIE from .rmcdecouverte import RMCDecouverteIE -from .ro220 import Ro220IE from .rockstargames import RockstarGamesIE from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE -from .roxwel import RoxwelIE from .rozhlas import RozhlasIE from .rtbf import RTBFIE from .rte import RteIE, RteRadioIE @@ -1595,7 +1588,6 @@ ThePlatformIE, ThePlatformFeedIE, ) -from .thescene import TheSceneIE from .thestar import TheStarIE from .thesun import TheSunIE from .theta import ( @@ -1900,7 +1892,6 @@ from .vshare import VShareIE from .vtm import VTMIE from .medialaan import MedialaanIE -from .vube import VubeIE from .vuclip import VuClipIE from .vupload import VuploadIE from .vvvvid import ( diff --git a/yt_dlp/extractor/fivemin.py b/yt_dlp/extractor/fivemin.py deleted file mode 100644 index f3f876ecda..0000000000 --- a/yt_dlp/extractor/fivemin.py +++ /dev/null @@ -1,54 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class FiveMinIE(InfoExtractor): - IE_NAME = '5min' - _VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P\d+)' - - _TESTS = [ - { - # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/ - 'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791', - 'md5': '4f7b0b79bf1a470e5004f7112385941d', - 'info_dict': { - 'id': '518013791', - 'ext': 'mp4', - 'title': 'iPad Mini with Retina Display Review', - 'description': 'iPad mini with Retina Display review', - 'duration': 177, - 'uploader': 'engadget', - 'upload_date': '20131115', - 'timestamp': 1384515288, - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, - { - # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247 - 'url': '5min:518086247', - 'md5': 'e539a9dd682c288ef5a498898009f69e', - 'info_dict': { - 'id': '518086247', - 'ext': 'mp4', - 'title': 'How to Make a Next-Level Fruit Salad', - 'duration': 184, - }, - 'skip': 'no longer available', - }, - { - 'url': 'http://embed.5min.com/518726732/', - 'only_matching': True, - }, - { - 'url': 'http://delivery.vidible.tv/aol?playList=518013791', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result('aol-video:%s' % video_id) diff --git a/yt_dlp/extractor/freshlive.py b/yt_dlp/extractor/freshlive.py deleted file mode 100644 index ad19b81094..0000000000 --- a/yt_dlp/extractor/freshlive.py +++ /dev/null @@ -1,80 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - int_or_none, - try_get, - unified_timestamp, -) - - -class FreshLiveIE(InfoExtractor): - _VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P\d+)' - _TEST = { - 'url': 'https://freshlive.tv/satotv/74712', - 'md5': '9f0cf5516979c4454ce982df3d97f352', - 'info_dict': { - 'id': '74712', - 'ext': 'mp4', - 'title': 'テスト', - 'description': 'テスト', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1511, - 'timestamp': 1483619655, - 'upload_date': '20170105', - 'uploader': 'サトTV', - 'uploader_id': 'satotv', - 'view_count': int, - 'comment_count': int, - 'is_live': False, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - options = self._parse_json( - self._search_regex( - r'window\.__CONTEXT__\s*=\s*({.+?});\s*', - webpage, 'initial context'), - video_id) - - info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id] - - title = info['title'] - - if info.get('status') == 'upcoming': - raise ExtractorError('Stream %s is upcoming' % video_id, expected=True) - - stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl'] - - is_live = info.get('liveStreamUrl') is not None - - formats = self._extract_m3u8_formats( - stream_url, video_id, 'mp4', - 'm3u8_native', m3u8_id='hls') - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': info.get('description'), - 'thumbnail': info.get('thumbnailUrl'), - 'duration': int_or_none(info.get('airTime')), - 'timestamp': unified_timestamp(info.get('createdAt')), - 'uploader': try_get( - info, lambda x: x['channel']['title'], compat_str), - 'uploader_id': try_get( - info, lambda x: x['channel']['code'], compat_str), - 'uploader_url': try_get( - info, lambda x: x['channel']['permalink'], compat_str), - 'view_count': int_or_none(info.get('viewCount')), - 'comment_count': int_or_none(info.get('commentCount')), - 'tags': info.get('tags', []), - 'is_live': is_live, - } diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2b59d076f8..f723d1e8c3 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1456,24 +1456,6 @@ class GenericIE(InfoExtractor): 'duration': 45.115, }, }, - # 5min embed - { - 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/', - 'md5': '4c6f127a30736b59b3e2c19234ee2bf7', - 'info_dict': { - 'id': '518726732', - 'ext': 'mp4', - 'title': 'Facebook Creates "On This Day" | Crunch Report', - 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild', - 'timestamp': 1427237531, - 'uploader': 'Crunch Report', - 'upload_date': '20150324', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, # Crooks and Liars embed { 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', @@ -3337,12 +3319,6 @@ def _real_extract(self, url): if mobj is not None: return self.url_result(mobj.group('url')) - # Look for 5min embeds - mobj = re.search( - r']+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P[0-9]+)/?', webpage) - if mobj is not None: - return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') - # Look for Crooks and Liars embeds mobj = re.search( r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage) diff --git a/yt_dlp/extractor/hornbunny.py b/yt_dlp/extractor/hornbunny.py deleted file mode 100644 index c458a959d9..0000000000 --- a/yt_dlp/extractor/hornbunny.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, -) - - -class HornBunnyIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P[a-z-]+)-(?P\d+)\.html' - _TEST = { - 'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html', - 'md5': 'e20fd862d1894b67564c96f180f43924', - 'info_dict': { - 'id': '5227', - 'ext': 'mp4', - 'title': 'panty slut jerk off instruction', - 'duration': 550, - 'age_limit': 18, - 'view_count': int, - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] - - duration = parse_duration(self._search_regex( - r'Runtime:\s*([0-9:]+)', - webpage, 'duration', fatal=False)) - view_count = int_or_none(self._search_regex( - r'Views:\s*(\d+)', - webpage, 'view count', fatal=False)) - - info_dict.update({ - 'id': video_id, - 'title': title, - 'duration': duration, - 'view_count': view_count, - 'age_limit': 18, - }) - - return info_dict diff --git a/yt_dlp/extractor/huffpost.py b/yt_dlp/extractor/huffpost.py index 97e36f0568..54385bafa2 100644 --- a/yt_dlp/extractor/huffpost.py +++ b/yt_dlp/extractor/huffpost.py @@ -80,9 +80,6 @@ def _real_extract(self, url): 'vcodec': 'none' if key.startswith('audio/') else None, }) - if not formats and data.get('fivemin_id'): - return self.url_result('5min:%s' % data['fivemin_id']) - self._sort_formats(formats) return { diff --git a/yt_dlp/extractor/ir90tv.py b/yt_dlp/extractor/ir90tv.py deleted file mode 100644 index d5a3f6fa5d..0000000000 --- a/yt_dlp/extractor/ir90tv.py +++ /dev/null @@ -1,42 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import remove_start - - -class Ir90TvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P[0-9]+)/.*' - _TESTS = [{ - 'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218', - 'md5': '411dbd94891381960cb9e13daa47a869', - 'info_dict': { - 'id': '95719', - 'ext': 'mp4', - 'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18', - 'thumbnail': r're:^https?://.*\.jpg$', - } - }, { - 'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = remove_start(self._html_search_regex( - r'([^<]+)', webpage, 'title'), '90tv.ir :: ') - - video_url = self._search_regex( - r']+src="([^"]+)"', webpage, 'video url') - - thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False) - - return { - 'url': video_url, - 'id': video_id, - 'title': title, - 'video_url': video_url, - 'thumbnail': thumbnail, - } diff --git a/yt_dlp/extractor/kankan.py b/yt_dlp/extractor/kankan.py deleted file mode 100644 index a677ff4471..0000000000 --- a/yt_dlp/extractor/kankan.py +++ /dev/null @@ -1,48 +0,0 @@ -from __future__ import unicode_literals - -import re -import hashlib - -from .common import InfoExtractor - -_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() - - -class KankanIE(InfoExtractor): - _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P\d+)\.shtml' - - _TEST = { - 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml', - 'md5': '29aca1e47ae68fc28804aca89f29507e', - 'info_dict': { - 'id': '48863', - 'ext': 'flv', - 'title': 'Ready To Go', - }, - 'skip': 'Only available from China', - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title') - surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0) - gcids = re.findall(r'http://.+?/.+?/(.+?)/', surls) - gcid = gcids[-1] - - info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid - video_info_page = self._download_webpage( - info_url, video_id, 'Downloading video url info') - ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip') - path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path') - param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1') - param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2') - key = _md5('xl_mp43651' + param1 + param2) - video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2) - - return { - 'id': video_id, - 'title': title, - 'url': video_url, - } diff --git a/yt_dlp/extractor/ro220.py b/yt_dlp/extractor/ro220.py deleted file mode 100644 index 69934ef2b0..0000000000 --- a/yt_dlp/extractor/ro220.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote - - -class Ro220IE(InfoExtractor): - IE_NAME = '220.ro' - _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)' - _TEST = { - 'url': 'http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/', - 'md5': '03af18b73a07b4088753930db7a34add', - 'info_dict': { - 'id': 'LYV6doKo7f', - 'ext': 'mp4', - 'title': 'Luati-le Banii sez 4 ep 1', - 'description': r're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - url = compat_urllib_parse_unquote(self._search_regex( - r'(?s)clip\s*:\s*{.*?url\s*:\s*\'([^\']+)\'', webpage, 'url')) - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) - - formats = [{ - 'format_id': 'sd', - 'url': url, - 'ext': 'mp4', - }] - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - } diff --git a/yt_dlp/extractor/roxwel.py b/yt_dlp/extractor/roxwel.py deleted file mode 100644 index 84bb1aa00b..0000000000 --- a/yt_dlp/extractor/roxwel.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import unicode_literals - - -from .common import InfoExtractor -from ..utils import unified_strdate, determine_ext - - -class RoxwelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P.+?)(\.|\?|$)' - - _TEST = { - 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html', - 'info_dict': { - 'id': 'passionpittakeawalklive', - 'ext': 'flv', - 'title': 'Take A Walk (live)', - 'uploader': 'Passion Pit', - 'uploader_id': 'passionpit', - 'upload_date': '20120928', - 'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ', - }, - 'params': { - # rtmp download - 'skip_download': True, - } - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - filename = mobj.group('filename') - info_url = 'http://www.roxwel.com/api/videos/%s' % filename - info = self._download_json(info_url, filename) - - rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')]) - best_rate = rtmp_rates[-1] - url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate) - rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url') - ext = determine_ext(rtmp_url) - if ext == 'f4v': - rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename) - - return { - 'id': filename, - 'title': info['title'], - 'url': rtmp_url, - 'ext': 'flv', - 'description': info['description'], - 'thumbnail': info.get('player_image_url') or info.get('image_url_large'), - 'uploader': info['artist'], - 'uploader_id': info['artistname'], - 'upload_date': unified_strdate(info['dbdate']), - } diff --git a/yt_dlp/extractor/thescene.py b/yt_dlp/extractor/thescene.py deleted file mode 100644 index cd642355c9..0000000000 --- a/yt_dlp/extractor/thescene.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - -from ..compat import compat_urlparse - - -class TheSceneIE(InfoExtractor): - _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P[^/#?]+)' - - _TEST = { - 'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear', - 'info_dict': { - 'id': '520e8faac2b4c00e3c6e5f43', - 'ext': 'mp4', - 'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear', - 'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear', - 'duration': 127, - 'series': 'Style.com Fashion Shows', - 'season': 'Ready To Wear Spring 2013', - 'tags': list, - 'categories': list, - 'upload_date': '20120913', - 'timestamp': 1347512400, - 'uploader': 'vogue', - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - player_url = compat_urlparse.urljoin( - url, - self._html_search_regex( - r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url')) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'url': player_url, - 'ie_key': 'CondeNast', - } diff --git a/yt_dlp/extractor/vube.py b/yt_dlp/extractor/vube.py deleted file mode 100644 index 1c8f80ae9f..0000000000 --- a/yt_dlp/extractor/vube.py +++ /dev/null @@ -1,170 +0,0 @@ -from __future__ import unicode_literals - - -from .common import InfoExtractor -from ..compat import ( - compat_str, -) -from ..utils import ( - int_or_none, -) - - -class VubeIE(InfoExtractor): - IE_NAME = 'vube' - IE_DESC = 'Vube.com' - _VALID_URL = r'https?://vube\.com/(?:[^/]+/)+(?P[\da-zA-Z]{10})\b' - - _TESTS = [ - { - 'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s', - 'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42', - 'info_dict': { - 'id': 'Y8NUZ69Tf7', - 'ext': 'mp4', - 'title': 'Best Drummer Ever [HD]', - 'description': 'md5:2d63c4b277b85c2277761c2cf7337d71', - 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'William', - 'timestamp': 1406876915, - 'upload_date': '20140801', - 'duration': 258.051, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'], - }, - 'skip': 'Not accessible from Travis CI server', - }, { - 'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', - 'md5': 'db7aba89d4603dadd627e9d1973946fe', - 'info_dict': { - 'id': 'YL2qNPkqon', - 'ext': 'mp4', - 'title': 'Chiara Grispo - Price Tag by Jessie J', - 'description': 'md5:8ea652a1f36818352428cb5134933313', - 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$', - 'uploader': 'Chiara.Grispo', - 'timestamp': 1388743358, - 'upload_date': '20140103', - 'duration': 170.56, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'], - }, - 'skip': 'Removed due to DMCA', - }, - { - 'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1', - 'md5': '5d4a52492d76f72712117ce6b0d98d08', - 'info_dict': { - 'id': 'UeBhTudbfS', - 'ext': 'mp4', - 'title': 'My 7 year old Sister and I singing "Alive" by Krewella', - 'description': 'md5:40bcacb97796339f1690642c21d56f4a', - 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$', - 'uploader': 'Seraina', - 'timestamp': 1396492438, - 'upload_date': '20140403', - 'duration': 240.107, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'categories': ['seraina', 'jessica', 'krewella', 'alive'], - }, - 'skip': 'Removed due to DMCA', - }, { - 'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s', - 'md5': '0584fc13b50f887127d9d1007589d27f', - 'info_dict': { - 'id': '0nmsMY5vEq', - 'ext': 'mp4', - 'title': 'Frozen - Let It Go Cover by Siren Gene', - 'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.', - 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$', - 'uploader': 'Siren', - 'timestamp': 1395448018, - 'upload_date': '20140322', - 'duration': 221.788, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'], - }, - 'skip': 'Removed due to DMCA', - } - ] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - video = self._download_json( - 'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON') - - public_id = video['public_id'] - - formats = [] - - for media in video['media'].get('video', []) + video['media'].get('audio', []): - if media['transcoding_status'] != 'processed': - continue - fmt = { - 'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id), - 'abr': int(media['audio_bitrate']), - 'format_id': compat_str(media['media_resolution_id']), - } - vbr = int(media['video_bitrate']) - if vbr: - fmt.update({ - 'vbr': vbr, - 'height': int(media['height']), - }) - formats.append(fmt) - - if not formats and video.get('vst') == 'dmca': - self.raise_no_formats( - 'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.', - expected=True) - - self._sort_formats(formats) - - title = video['title'] - description = video.get('description') - thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:') - uploader = video.get('user_alias') or video.get('channel') - timestamp = int_or_none(video.get('upload_time')) - duration = video['duration'] - view_count = video.get('raw_view_count') - like_count = video.get('total_likes') - dislike_count = video.get('total_hates') - - comments = video.get('comments') - comment_count = None - if comments is None: - comment_data = self._download_json( - 'http://vube.com/api/video/%s/comment' % video_id, - video_id, 'Downloading video comment JSON', fatal=False) - if comment_data is not None: - comment_count = int_or_none(comment_data.get('total')) - else: - comment_count = len(comments) - - categories = [tag['text'] for tag in video['tags']] - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'timestamp': timestamp, - 'duration': duration, - 'view_count': view_count, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'comment_count': comment_count, - 'categories': categories, - }