diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index ad8c7d661..9b96bd5b4 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -77,7 +77,6 @@ WyborczaPodcastIE, WyborczaVideoIE, ) -from .airmozilla import AirMozillaIE from .airtv import AirTVIE from .aitube import AitubeKZVideoIE from .aljazeera import AlJazeeraIE @@ -147,7 +146,6 @@ from .arnes import ArnesIE from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE -from .atttechchannel import ATTTechChannelIE from .atvat import ATVAtIE from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE @@ -243,11 +241,6 @@ BitChuteIE, BitChuteChannelIE, ) -from .bitwave import ( - BitwaveReplayIE, - BitwaveStreamIE, -) -from .biqle import BIQLEIE from .blackboardcollaborate import BlackboardCollaborateIE from .bleacherreport import ( BleacherReportIE, @@ -262,10 +255,7 @@ from .box import BoxIE from .boxcast import BoxCastVideoIE from .bpb import BpbIE -from .br import ( - BRIE, - BRMediathekIE, -) +from .br import BRIE from .bravotv import BravoTVIE from .brainpop import ( BrainPOPIE, @@ -275,7 +265,6 @@ BrainPOPFrIE, BrainPOPIlIE, ) -from .breakcom import BreakIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, @@ -305,16 +294,11 @@ from .cammodels import CamModelsIE from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE -from .camwithher import CamWithHerIE from .canal1 import Canal1IE from .canalalpha import CanalAlphaIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .caracoltv import CaracolTvPlayIE -from .carambatv import ( - CarambaTVIE, - CarambaTVPageIE, -) from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, @@ -353,7 +337,6 @@ from .cellebrite import CellebriteIE from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE -from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE @@ -361,11 +344,6 @@ ChingariIE, ChingariUserIE, ) -from .chirbit import ( - ChirbitIE, - ChirbitProfileIE, -) -from .cinchcast import CinchcastIE from .cinemax import CinemaxIE from .cinetecamilano import CinetecaMilanoIE from .cineverse import ( @@ -382,10 +360,8 @@ from .cliphunter import CliphunterIE from .clippit import ClippitIE from .cliprs import ClipRsIE -from .clipsyndicate import ClipsyndicateIE from .closertotruth import CloserToTruthIE from .cloudflarestream import CloudflareStreamIE -from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE @@ -452,7 +428,6 @@ DacastVODIE, DacastPlaylistIE, ) -from .daftsex import DaftsexIE from .dailymail import DailyMailIE from .dailymotion import ( DailymotionIE, @@ -489,7 +464,6 @@ from .dfb import DFBIE from .dhm import DHMIE from .digg import DiggIE -from .dotsub import DotsubIE from .douyutv import ( DouyuShowIE, DouyuTVIE, @@ -536,7 +510,6 @@ DubokuPlaylistIE ) from .dumpert import DumpertIE -from .defense import DefenseGouvFrIE from .deuxm import ( DeuxMIE, DeuxMNewsIE @@ -559,26 +532,17 @@ from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE -from .echomsk import EchoMskIE from .egghead import ( EggheadCourseIE, EggheadLessonIE, ) -from .ehow import EHowIE from .eighttracks import EightTracksIE from .einthusan import EinthusanIE from .eitb import EitbIE -from .elevensports import ElevenSportsIE -from .ellentube import ( - EllenTubeIE, - EllenTubeVideoIE, - EllenTubePlaylistIE, -) from .elonet import ElonetIE from .elpais import ElPaisIE from .eltrecetv import ElTreceTVIE from .embedly import EmbedlyIE -from .engadget import EngadgetIE from .epicon import ( EpiconIE, EpiconSeriesIE, @@ -596,7 +560,6 @@ ERTFlixIE, ERTWebtvEmbedIE, ) -from .escapist import EscapistIE from .espn import ( ESPNIE, WatchESPNIE, @@ -604,15 +567,12 @@ FiveThirtyEightIE, ESPNCricInfoIE, ) -from .esri import EsriVideoIE from .ettutv import EttuTvIE from .europa import EuropaIE, EuroParlWebstreamIE from .europeantour import EuropeanTourIE from .eurosport import EurosportIE from .euscreen import EUScreenIE -from .expotv import ExpoTVIE from .expressen import ExpressenIE -from .extremetube import ExtremeTubeIE from .eyedotv import EyedoTVIE from .facebook import ( FacebookIE, @@ -655,16 +615,11 @@ PornerBrosIE, FuxIE, ) -from .fourzerostudio import ( - FourZeroStudioArchiveIE, - FourZeroStudioClipIE, -) from .fox import FOXIE from .fox9 import ( FOX9IE, FOX9NewsIE, ) -from .foxgay import FoxgayIE from .foxnews import ( FoxNewsIE, FoxNewsArticleIE, @@ -697,7 +652,6 @@ ) from .funk import FunkIE from .funker530 import Funker530IE -from .fusion import FusionIE from .fuyintv import FuyinTVIE from .gab import ( GabTVIE, @@ -728,7 +682,6 @@ GettrIE, GettrStreamingIE, ) -from .gfycat import GfycatIE from .giantbomb import GiantBombIE from .giga import GigaIE from .glide import GlideIE @@ -774,12 +727,10 @@ from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE -from .helsinki import HelsinkiIE from .hgtv import HGTVComShowIE from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE -from .hitbox import HitboxIE, HitboxLiveIE from .hitrecord import HitRecordIE from .hollywoodreporter import ( HollywoodReporterIE, @@ -794,8 +745,6 @@ HotStarSeasonIE, HotStarSeriesIE, ) -from .howcast import HowcastIE -from .howstuffworks import HowStuffWorksIE from .hrefli import HrefLiRedirectIE from .hrfensehen import HRFernsehenIE from .hrti import ( @@ -927,7 +876,6 @@ from .kankanews import KankaNewsIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE -from .keezmovies import KeezMoviesIE from .kelbyone import KelbyOneIE from .khanacademy import ( KhanAcademyIE, @@ -962,12 +910,6 @@ LA7PodcastEpisodeIE, LA7PodcastIE, ) -from .laola1tv import ( - Laola1TvEmbedIE, - Laola1TvIE, - EHFTVIE, - ITTFIE, -) from .lastfm import ( LastFMIE, LastFMPlaylistIE, @@ -1022,7 +964,6 @@ LinkedInLearningIE, LinkedInLearningCourseIE, ) -from .linuxacademy import LinuxAcademyIE from .liputan6 import Liputan6IE from .listennotes import ListenNotesIE from .litv import LiTVIE @@ -1050,7 +991,6 @@ LyndaIE, LyndaCourseIE ) -from .m6 import M6IE from .magellantv import MagellanTVIE from .magentamusik360 import MagentaMusik360IE from .mailru import ( @@ -1101,10 +1041,7 @@ from .megaphone import MegaphoneIE from .meipai import MeipaiIE from .melonvod import MelonVODIE -from .meta import METAIE -from .metacafe import MetacafeIE from .metacritic import MetacriticIE -from .mgoon import MgoonIE from .mgtv import MGTVIE from .miaopai import MiaoPaiIE from .microsoftstream import MicrosoftStreamIE @@ -1126,7 +1063,6 @@ ) from .ministrygrid import MinistryGridIE from .minoto import MinotoIE -from .miomio import MioMioIE from .mirrativ import ( MirrativIE, MirrativUserIE, @@ -1150,13 +1086,7 @@ MLBArticleIE, ) from .mlssoccer import MLSSoccerIE -from .mnet import MnetIE from .mocha import MochaVideoIE -from .moevideo import MoeVideoIE -from .mofosex import ( - MofosexIE, - MofosexEmbedIE, -) from .mojvideo import MojvideoIE from .monstercat import MonstercatIE from .morningstar import MorningstarIE @@ -1166,7 +1096,6 @@ MotherlessGalleryIE, ) from .motorsport import MotorsportIE -from .movieclips import MovieClipsIE from .moviepilot import MoviepilotIE from .moview import MoviewPlayIE from .moviezine import MoviezineIE @@ -1191,18 +1120,12 @@ MusicdexArtistIE, MusicdexPlaylistIE, ) -from .mwave import MwaveIE, MwaveMeetGreetIE from .mxplayer import ( MxplayerIE, MxplayerShowIE, ) -from .mychannels import MyChannelsIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE -from .myvi import ( - MyviIE, - MyviEmbedIE, -) from .myvideoge import MyVideoGeIE from .myvidster import MyVidsterIE from .mzaalo import MzaaloIE @@ -1278,7 +1201,6 @@ NewgroundsUserIE, ) from .newspicks import NewsPicksIE -from .newstube import NewstubeIE from .newsy import NewsyIE from .nextmedia import ( NextMediaIE, @@ -1313,7 +1235,6 @@ NickIE, NickBrIE, NickDeIE, - NickNightIE, NickRuIE, ) from .niconico import ( @@ -1346,8 +1267,6 @@ from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE from .noovo import NoovoIE -from .normalboots import NormalbootsIE -from .nosvideo import NosVideoIE from .nosnl import NOSNLArticleIE from .nova import ( NovaEmbedIE, @@ -1422,10 +1341,6 @@ OnetPlIE, ) from .onionstudios import OnionStudiosIE -from .ooyala import ( - OoyalaIE, - OoyalaExternalIE, -) from .opencast import ( OpencastIE, OpencastPlaylistIE, @@ -1454,7 +1369,6 @@ PalcoMP3ArtistIE, PalcoMP3VideoIE, ) -from .pandoratv import PandoraTVIE from .panopto import ( PanoptoIE, PanoptoListIE, @@ -1482,7 +1396,6 @@ PelotonIE, PelotonLiveIE ) -from .people import PeopleIE from .performgroup import PerformGroupIE from .periscope import ( PeriscopeIE, @@ -1514,13 +1427,10 @@ PlatziIE, PlatziCourseIE, ) -from .playfm import PlayFMIE from .playplustv import PlayPlusTVIE -from .plays import PlaysTVIE from .playstuff import PlayStuffIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE -from .playvid import PlayvidIE from .playwire import PlaywireIE from .plutotv import PlutoTVIE from .pluralsight import ( @@ -1552,9 +1462,7 @@ from .popcorntv import PopcornTVIE from .porn91 import Porn91IE from .pornbox import PornboxIE -from .porncom import PornComIE from .pornflip import PornFlipIE -from .pornhd import PornHdIE from .pornhub import ( PornHubIE, PornHubUserIE, @@ -1565,7 +1473,6 @@ from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE -from .pornez import PornezIE from .puhutv import ( PuhuTVIE, PuhuTVSerieIE, @@ -1609,7 +1516,6 @@ ) from .radiode import RadioDeIE from .radiojavan import RadioJavanIE -from .radiobremen import RadioBremenIE from .radiofrance import ( FranceCultureIE, RadioFranceIE, @@ -1661,7 +1567,6 @@ RCTIPlusTVIE, ) from .rds import RDSIE -from .recurbate import RecurbateIE from .redbee import ParliamentLiveUKIE, RTBFIE from .redbulltv import ( RedBullTVIE, @@ -1685,7 +1590,6 @@ from .reuters import ReutersIE from .reverbnation import ReverbNationIE from .rheinmaintv import RheinMainTVIE -from .rice import RICEIE from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE from .rokfin import ( @@ -1709,11 +1613,7 @@ RTLLuLiveIE, RTLLuRadioIE, ) -from .rtl2 import ( - RTL2IE, - RTL2YouIE, - RTL2YouSeriesIE, -) +from .rtl2 import RTL2IE from .rtnews import ( RTNewsIE, RTDocumentryIE, @@ -1735,10 +1635,8 @@ RTVEInfantilIE, RTVETelevisionIE, ) -from .rtvnh import RTVNHIE from .rtvs import RTVSIE from .rtvslo import RTVSLOIE -from .ruhd import RUHDIE from .rule34video import Rule34VideoIE from .rumble import ( RumbleEmbedIE, @@ -1820,10 +1718,6 @@ ShahidIE, ShahidShowIE, ) -from .shared import ( - SharedIE, - VivoIE, -) from .sharevideos import ShareVideosEmbedIE from .sibnet import SibnetEmbedIE from .shemaroome import ShemarooMeIE @@ -1901,7 +1795,6 @@ SpankBangIE, SpankBangPlaylistIE, ) -from .spankwire import SpankwireIE from .spiegel import SpiegelIE from .spike import ( BellatorIE, @@ -1951,7 +1844,6 @@ StoryFireSeriesIE, ) from .streamable import StreamableIE -from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streamff import StreamFFIE from .streetvoice import StreetVoiceIE @@ -1971,7 +1863,6 @@ SVTSeriesIE, ) from .swearnet import SwearnetEpisodeIE -from .swrmediathek import SWRMediathekIE from .syvdk import SYVDKIE from .syfy import SyfyIE from .sztvhu import SztvHuIE @@ -1998,7 +1889,6 @@ ConanClassicIE, ) from .teamtreehouse import TeamTreeHouseIE -from .techtalks import TechTalksIE from .ted import ( TedEmbedIE, TedPlaylistIE, @@ -2075,7 +1965,6 @@ TikTokLiveIE, DouyinIE, ) -from .tinypic import TinyPicIE from .tmz import TMZIE from .tnaflix import ( TNAFlixNetworkEmbedIE, @@ -2090,10 +1979,6 @@ from .toggo import ( ToggoIE, ) -from .tokentube import ( - TokentubeIE, - TokentubeChannelIE -) from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE from .toutv import TouTvIE @@ -2104,7 +1989,6 @@ TrillerUserIE, TrillerShortIE, ) -from .trilulilu import TriluliluIE from .trovo import ( TrovoIE, TrovoVodIE, @@ -2129,7 +2013,6 @@ TuneInPodcastEpisodeIE, TuneInShortenerIE, ) -from .tunepk import TunePkIE from .turbo import TurboIE from .tv2 import ( TV2IE, @@ -2171,16 +2054,7 @@ from .tviplayer import TVIPlayerIE from .tvland import TVLandIE from .tvn24 import TVN24IE -from .tvnet import TVNetIE from .tvnoe import TVNoeIE -from .tvnow import ( - TVNowIE, - TVNowFilmIE, - TVNowNewIE, - TVNowSeasonIE, - TVNowAnnualIE, - TVNowShowIE, -) from .tvopengr import ( TVOpenGrWatchIE, TVOpenGrEmbedIE, @@ -2198,7 +2072,6 @@ ) from .tvplayer import TVPlayerIE from .tweakers import TweakersIE -from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE from .twitcasting import ( @@ -2247,7 +2120,6 @@ from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE -from .unscripted import UnscriptedNewsVideoIE from .unsupported import KnownDRMIE, KnownPiracyIE from .uol import UOLIE from .uplynk import ( @@ -2266,7 +2138,6 @@ from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE -from .veehd import VeeHDIE from .veo import VeoIE from .veoh import ( VeohIE, @@ -2288,7 +2159,6 @@ ViceArticleIE, ViceShowIE, ) -from .vidbit import VidbitIE from .viddler import ViddlerIE from .videa import VideaIE from .videocampus_sachsen import ( @@ -2339,7 +2209,6 @@ VimmIE, VimmRecordingIE, ) -from .vimple import VimpleIE from .vine import ( VineIE, VineUserIE, @@ -2363,10 +2232,8 @@ VKPlayLiveIE, ) from .vocaroo import VocarooIE -from .vodlocker import VodlockerIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE -from .voicerepublic import VoiceRepublicIE from .voicy import ( VoicyIE, VoicyChannelIE, @@ -2386,23 +2253,13 @@ KetnetIE, DagelijkseKostIE, ) -from .vrak import VrakIE -from .vrv import ( - VRVIE, - VRVSeriesIE, -) -from .vshare import VShareIE from .vtm import VTMIE from .medialaan import MedialaanIE from .vuclip import VuClipIE -from .vupload import VuploadIE from .vvvvid import ( VVVVIDIE, VVVVIDShowIE, ) -from .vyborymos import VyboryMosIE -from .vzaar import VzaarIE -from .wakanim import WakanimIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, @@ -2414,8 +2271,6 @@ WASDTVClipIE, ) from .wat import WatIE -from .watchbox import WatchBoxIE -from .watchindianporn import WatchIndianPornIE from .wdr import ( WDRIE, WDRPageIE, @@ -2449,7 +2304,6 @@ from .weyyak import WeyyakIE from .whyp import WhypIE from .wikimedia import WikimediaIE -from .willow import WillowIE from .wimbledon import WimbledonIE from .wimtv import WimTVIE from .whowatch import WhoWatchIE @@ -2483,7 +2337,6 @@ WykopPostCommentIE, ) from .xanimu import XanimuIE -from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE from .xhamster import ( @@ -2499,8 +2352,6 @@ from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE -from .xtube import XTubeUserIE, XTubeIE -from .xuite import XuiteIE from .xvideos import ( XVideosIE, XVideosQuickiesIE @@ -2530,10 +2381,7 @@ YappyIE, YappyProfileIE, ) -from .yesjapan import YesJapanIE -from .yinyuetai import YinYueTaiIE from .yle_areena import YleAreenaIE -from .ynet import YnetIE from .youjizz import YouJizzIE from .youku import ( YoukuIE, diff --git a/yt_dlp/extractor/airmozilla.py b/yt_dlp/extractor/airmozilla.py deleted file mode 100644 index 669556b98..000000000 --- a/yt_dlp/extractor/airmozilla.py +++ /dev/null @@ -1,63 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601, -) - - -class AirMozillaIE(InfoExtractor): - _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?' - _TEST = { - 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', - 'md5': '8d02f53ee39cf006009180e21df1f3ba', - 'info_dict': { - 'id': '6x4q2w', - 'ext': 'mp4', - 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', - 'thumbnail': r're:https?://.*/poster\.jpg', - 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', - 'timestamp': 1422487800, - 'upload_date': '20150128', - 'location': 'SFO Commons', - 'duration': 3780, - 'view_count': int, - 'categories': ['Main', 'Privacy'], - } - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id') - - embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) - jwconfig = self._parse_json(self._search_regex( - r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config'] - - info_dict = self._parse_jwplayer_data(jwconfig, video_id) - view_count = int_or_none(self._html_search_regex( - r'Views since archived: ([0-9]+)', - webpage, 'view count', fatal=False)) - timestamp = parse_iso8601(self._html_search_regex( - r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False)) - duration = parse_duration(self._search_regex( - r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)', - webpage, 'duration', fatal=False)) - - info_dict.update({ - 'id': video_id, - 'title': self._og_search_title(webpage), - 'url': self._og_search_url(webpage), - 'display_id': display_id, - 'description': self._og_search_description(webpage), - 'timestamp': timestamp, - 'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None), - 'duration': duration, - 'view_count': view_count, - 'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage), - }) - - return info_dict diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 6949ca974..455f66795 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -10,6 +10,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE + _WORKING = False IE_NAME = 'aol.com' _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})' diff --git a/yt_dlp/extractor/atttechchannel.py b/yt_dlp/extractor/atttechchannel.py deleted file mode 100644 index 6ff4ec0ad..000000000 --- a/yt_dlp/extractor/atttechchannel.py +++ /dev/null @@ -1,53 +0,0 @@ -from .common import InfoExtractor -from ..utils import unified_strdate - - -class ATTTechChannelIE(InfoExtractor): - _VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)' - _TEST = { - 'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use', - 'info_dict': { - 'id': '11316', - 'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use', - 'ext': 'flv', - 'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use', - 'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20140127', - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - video_url = self._search_regex( - r"url\s*:\s*'(rtmp://[^']+)'", - webpage, 'video URL') - - video_id = self._search_regex( - r'mediaid\s*=\s*(\d+)', - webpage, 'video id', fatal=False) - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) - upload_date = unified_strdate(self._search_regex( - r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})', - webpage, 'upload date', fatal=False), False) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'ext': 'flv', - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - } diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py index ca4498150..9d2324f4f 100644 --- a/yt_dlp/extractor/behindkink.py +++ b/yt_dlp/extractor/behindkink.py @@ -3,6 +3,7 @@ class BehindKinkIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' _TEST = { 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index 6b867d135..cbf3dd082 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -1,10 +1,9 @@ from .mtv import MTVServicesInfoExtractor from ..utils import unified_strdate -# TODO Remove - Reason: Outdated Site - class BetIE(MTVServicesInfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' _TESTS = [ { diff --git a/yt_dlp/extractor/bfi.py b/yt_dlp/extractor/bfi.py index 76f0516a4..a6ebfedff 100644 --- a/yt_dlp/extractor/bfi.py +++ b/yt_dlp/extractor/bfi.py @@ -5,6 +5,7 @@ class BFIPlayerIE(InfoExtractor): + _WORKING = False IE_NAME = 'bfi:player' _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online' _TEST = { diff --git a/yt_dlp/extractor/biqle.py b/yt_dlp/extractor/biqle.py deleted file mode 100644 index 027753503..000000000 --- a/yt_dlp/extractor/biqle.py +++ /dev/null @@ -1,110 +0,0 @@ -from .common import InfoExtractor -from .vk import VKIE -from ..compat import compat_b64decode -from ..utils import ( - int_or_none, - js_to_json, - traverse_obj, - unified_timestamp, -) - - -class BIQLEIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)' - _TESTS = [{ - 'url': 'https://biqle.ru/watch/-2000421746_85421746', - 'md5': 'ae6ef4f04d19ac84e4658046d02c151c', - 'info_dict': { - 'id': '-2000421746_85421746', - 'ext': 'mp4', - 'title': 'Forsaken By Hope Studio Clip', - 'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн', - 'upload_date': '19700101', - 'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb', - 'timestamp': 0, - }, - }, { - 'url': 'http://biqle.org/watch/-44781847_168547604', - 'md5': '7f24e72af1db0edf7c1aaba513174f97', - 'info_dict': { - 'id': '-44781847_168547604', - 'ext': 'mp4', - 'title': 'Ребенок в шоке от автоматической мойки', - 'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн', - 'timestamp': 1396633454, - 'upload_date': '20140404', - 'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_meta('name', webpage, 'Title', fatal=False) - timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None)) - description = self._html_search_meta('description', webpage, 'Description', default=None) - - global_embed_url = self._search_regex( - r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'', - webpage, 'global Embed url') - hash = self._search_regex( - r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash') - - embed_url = global_embed_url + hash - - if VKIE.suitable(embed_url): - return self.url_result(embed_url, VKIE.ie_key(), video_id) - - embed_page = self._download_webpage( - embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url}) - - glob_params = self._parse_json(self._search_regex( - r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>', - embed_page, 'Global Parameters'), video_id, transform_source=js_to_json) - host_name = compat_b64decode(glob_params['server'][::-1]).decode() - - item = self._download_json( - f'https://{host_name}/method/video.get/{video_id}', video_id, - headers={'Referer': url}, query={ - 'token': glob_params['video']['access_token'], - 'videos': video_id, - 'ckey': glob_params['c_key'], - 'credentials': glob_params['video']['credentials'], - })['response']['items'][0] - - formats = [] - for f_id, f_url in item.get('files', {}).items(): - if f_id == 'external': - return self.url_result(f_url) - ext, height = f_id.split('_') - height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height)) - if height_extra_key: - formats.append({ - 'format_id': f'{height}p', - 'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}', - 'height': int_or_none(height), - 'ext': ext, - }) - - thumbnails = [] - for k, v in item.items(): - if k.startswith('photo_') and v: - width = k.replace('photo_', '') - thumbnails.append({ - 'id': width, - 'url': v, - 'width': int_or_none(width), - }) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'comment_count': int_or_none(item.get('comments')), - 'description': description, - 'duration': int_or_none(item.get('duration')), - 'thumbnails': thumbnails, - 'timestamp': timestamp, - 'view_count': int_or_none(item.get('views')), - } diff --git a/yt_dlp/extractor/bitwave.py b/yt_dlp/extractor/bitwave.py deleted file mode 100644 index a82cd263a..000000000 --- a/yt_dlp/extractor/bitwave.py +++ /dev/null @@ -1,58 +0,0 @@ -from .common import InfoExtractor - - -class BitwaveReplayIE(InfoExtractor): - IE_NAME = 'bitwave:replay' - _VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<user>\w+)/replay/(?P<id>\w+)/?$' - _TEST = { - 'url': 'https://bitwave.tv/RhythmicCarnage/replay/z4P6eq5L7WDrM85UCrVr', - 'only_matching': True - } - - def _real_extract(self, url): - replay_id = self._match_id(url) - replay = self._download_json( - 'https://api.bitwave.tv/v1/replays/' + replay_id, - replay_id - ) - - return { - 'id': replay_id, - 'title': replay['data']['title'], - 'uploader': replay['data']['name'], - 'uploader_id': replay['data']['name'], - 'url': replay['data']['url'], - 'thumbnails': [ - {'url': x} for x in replay['data']['thumbnails'] - ], - } - - -class BitwaveStreamIE(InfoExtractor): - IE_NAME = 'bitwave:stream' - _VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<id>\w+)/?$' - _TEST = { - 'url': 'https://bitwave.tv/doomtube', - 'only_matching': True - } - - def _real_extract(self, url): - username = self._match_id(url) - channel = self._download_json( - 'https://api.bitwave.tv/v1/channels/' + username, - username) - - formats = self._extract_m3u8_formats( - channel['data']['url'], username, - 'mp4') - - return { - 'id': username, - 'title': channel['data']['title'], - 'uploader': username, - 'uploader_id': username, - 'formats': formats, - 'thumbnail': channel['data']['thumbnail'], - 'is_live': True, - 'view_count': channel['data']['viewCount'] - } diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index 8d8fabe33..5e5155af2 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -22,7 +22,7 @@ class BleacherReportIE(InfoExtractor): 'upload_date': '20150615', 'uploader': 'Team Stream Now ', }, - 'add_ie': ['Ooyala'], + 'skip': 'Video removed', }, { 'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo', 'md5': '6a5cd403418c7b01719248ca97fb0692', @@ -70,8 +70,6 @@ def _real_extract(self, url): video_type = video['type'] if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] - elif video_type == 'ooyala.com': - info['url'] = 'ooyala:%s' % video['id'] elif video_type == 'youtube.com': info['url'] = video['id'] elif video_type == 'vine.co': diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 309452d23..6e1c63e2b 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -1,18 +1,15 @@ -import json - from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, int_or_none, parse_duration, - parse_iso8601, xpath_element, xpath_text, ) class BRIE(InfoExtractor): + _WORKING = False IE_DESC = 'Bayerischer Rundfunk' _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html' @@ -167,142 +164,3 @@ def _extract_thumbnails(self, variants, base_url): } for variant in variants.findall('variant') if xpath_text(variant, 'url')] thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) return thumbnails - - -class BRMediathekIE(InfoExtractor): - IE_DESC = 'Bayerischer Rundfunk Mediathek' - _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek//?video/(?:[^/?&#]+?-)?(?P<id>av:[0-9a-f]{24})' - - _TESTS = [{ - 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e', - 'md5': 'fdc3d485835966d1622587d08ba632ec', - 'info_dict': { - 'id': 'av:5a1e6a6e8fce6d001871cc8e', - 'ext': 'mp4', - 'title': 'Die Sendung vom 28.11.2017', - 'description': 'md5:6000cdca5912ab2277e5b7339f201ccc', - 'timestamp': 1511942766, - 'upload_date': '20171129', - } - }, { - 'url': 'https://www.br.de/mediathek//video/av:61b0db581aed360007558c12', - 'only_matching': True, - }] - - def _real_extract(self, url): - clip_id = self._match_id(url) - - clip = self._download_json( - 'https://proxy-base.master.mango.express/graphql', - clip_id, data=json.dumps({ - "query": """{ - viewer { - clip(id: "%s") { - title - description - duration - createdAt - ageRestriction - videoFiles { - edges { - node { - publicLocation - fileSize - videoProfile { - width - height - bitrate - encoding - } - } - } - } - captionFiles { - edges { - node { - publicLocation - } - } - } - teaserImages { - edges { - node { - imageFiles { - edges { - node { - publicLocation - width - height - } - } - } - } - } - } - } - } -}""" % clip_id}).encode(), headers={ - 'Content-Type': 'application/json', - })['data']['viewer']['clip'] - title = clip['title'] - - formats = [] - for edge in clip.get('videoFiles', {}).get('edges', []): - node = edge.get('node', {}) - n_url = node.get('publicLocation') - if not n_url: - continue - ext = determine_ext(n_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - n_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - else: - video_profile = node.get('videoProfile', {}) - tbr = int_or_none(video_profile.get('bitrate')) - format_id = 'http' - if tbr: - format_id += '-%d' % tbr - formats.append({ - 'format_id': format_id, - 'url': n_url, - 'width': int_or_none(video_profile.get('width')), - 'height': int_or_none(video_profile.get('height')), - 'tbr': tbr, - 'filesize': int_or_none(node.get('fileSize')), - }) - - subtitles = {} - for edge in clip.get('captionFiles', {}).get('edges', []): - node = edge.get('node', {}) - n_url = node.get('publicLocation') - if not n_url: - continue - subtitles.setdefault('de', []).append({ - 'url': n_url, - }) - - thumbnails = [] - for edge in clip.get('teaserImages', {}).get('edges', []): - for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []): - node = image_edge.get('node', {}) - n_url = node.get('publicLocation') - if not n_url: - continue - thumbnails.append({ - 'url': n_url, - 'width': int_or_none(node.get('width')), - 'height': int_or_none(node.get('height')), - }) - - return { - 'id': clip_id, - 'title': title, - 'description': clip.get('description'), - 'duration': int_or_none(clip.get('duration')), - 'timestamp': parse_iso8601(clip.get('createdAt')), - 'age_limit': int_or_none(clip.get('ageRestriction')), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnails': thumbnails, - } diff --git a/yt_dlp/extractor/breakcom.py b/yt_dlp/extractor/breakcom.py deleted file mode 100644 index 00cf308c7..000000000 --- a/yt_dlp/extractor/breakcom.py +++ /dev/null @@ -1,86 +0,0 @@ -from .common import InfoExtractor -from .youtube import YoutubeIE -from ..utils import ( - int_or_none, - url_or_none, -) - - -class BreakIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' - _TESTS = [{ - 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', - 'info_dict': { - 'id': '2468056', - 'ext': 'mp4', - 'title': 'When Girls Act Like D-Bags', - 'age_limit': 13, - }, - }, { - # youtube embed - 'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work', - 'info_dict': { - 'id': 'RrrDLdeL2HQ', - 'ext': 'mp4', - 'title': 'Whale Watching Boat Crashing Into San Diego Dock', - 'description': 'md5:afc1b2772f0a8468be51dd80eb021069', - 'upload_date': '20160331', - 'uploader': 'Steve Holden', - 'uploader_id': 'sdholden07', - }, - 'params': { - 'skip_download': True, - } - }, { - 'url': 'http://www.break.com/video/ugc/baby-flex-2773063', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id, video_id = self._match_valid_url(url).groups() - - webpage = self._download_webpage(url, display_id) - - youtube_url = YoutubeIE._extract_url(webpage) - if youtube_url: - return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) - - content = self._parse_json( - self._search_regex( - r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage, - 'content'), - display_id) - - formats = [] - for video in content: - video_url = url_or_none(video.get('url')) - if not video_url: - continue - bitrate = int_or_none(self._search_regex( - r'(\d+)_kbps', video_url, 'tbr', default=None)) - formats.append({ - 'url': video_url, - 'format_id': 'http-%d' % bitrate if bitrate else 'http', - 'tbr': bitrate, - }) - - title = self._search_regex( - (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', - r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value') - - def get(key, name): - return int_or_none(self._search_regex( - r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name, - default=None)) - - age_limit = get('ratings', 'age limit') - video_id = video_id or get('pid', 'video id') or display_id - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), - 'age_limit': age_limit, - 'formats': formats, - } diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py index 9ed6efe79..ad35427ed 100644 --- a/yt_dlp/extractor/byutv.py +++ b/yt_dlp/extractor/byutv.py @@ -8,9 +8,9 @@ class BYUtvIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' _TESTS = [{ - # ooyalaVOD 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'info_dict': { 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', @@ -24,7 +24,6 @@ class BYUtvIE(InfoExtractor): 'params': { 'skip_download': True, }, - 'add_ie': ['Ooyala'], }, { # dvr 'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2', @@ -63,19 +62,6 @@ def _real_extract(self, url): 'x-byutv-platformkey': 'xsaaw9c7y5', }) - ep = video.get('ooyalaVOD') - if ep: - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ep['providerId'], - 'id': video_id, - 'display_id': display_id, - 'title': ep.get('title'), - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - } - info = {} formats = [] subtitles = {} diff --git a/yt_dlp/extractor/camwithher.py b/yt_dlp/extractor/camwithher.py deleted file mode 100644 index a0b3749ed..000000000 --- a/yt_dlp/extractor/camwithher.py +++ /dev/null @@ -1,87 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - unified_strdate, -) - - -class CamWithHerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)' - - _TESTS = [{ - 'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=', - 'info_dict': { - 'id': '5644', - 'ext': 'flv', - 'title': 'Periscope Tease', - 'description': 'In the clouds teasing on periscope to my favorite song', - 'duration': 240, - 'view_count': int, - 'comment_count': int, - 'uploader': 'MileenaK', - 'upload_date': '20160322', - 'age_limit': 18, - }, - 'params': { - 'skip_download': True, - } - }, { - 'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937', - 'only_matching': True, - }, { - 'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=', - 'only_matching': True, - }, { - 'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - flv_id = self._html_search_regex( - r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id') - - # Video URL construction algorithm is reverse-engineered from cwhplayer.swf - rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % ( - ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id) - - title = self._html_search_regex( - r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title') - description = self._html_search_regex( - r'>Description:</span>(.+?)</div>', webpage, 'description', default=None) - - runtime = self._search_regex( - r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None) - if runtime: - runtime = re.sub(r'[\s-]', '', runtime) - duration = parse_duration(runtime) - view_count = int_or_none(self._search_regex( - r'Views\s*:\s*(\d+)', webpage, 'view count', default=None)) - comment_count = int_or_none(self._search_regex( - r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None)) - - uploader = self._search_regex( - r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None) - upload_date = unified_strdate(self._search_regex( - r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None)) - - return { - 'id': flv_id, - 'url': rtmp_url, - 'ext': 'flv', - 'no_resume': True, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'comment_count': comment_count, - 'uploader': uploader, - 'upload_date': upload_date, - 'age_limit': 18 - } diff --git a/yt_dlp/extractor/carambatv.py b/yt_dlp/extractor/carambatv.py deleted file mode 100644 index d6044a319..000000000 --- a/yt_dlp/extractor/carambatv.py +++ /dev/null @@ -1,105 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - format_field, - float_or_none, - int_or_none, - try_get, -) - -from .videomore import VideomoreIE - - -class CarambaTVIE(InfoExtractor): - _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://video1.carambatv.ru/v/191910501', - 'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a', - 'info_dict': { - 'id': '191910501', - 'ext': 'mp4', - 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 2678.31, - }, - }, { - 'url': 'carambatv:191910501', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id, - video_id) - - title = video['title'] - - base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id - - formats = [{ - 'url': base_url + f['fn'], - 'height': int_or_none(f.get('height')), - 'format_id': format_field(f, 'height', '%sp'), - } for f in video['qualities'] if f.get('fn')] - - thumbnail = video.get('splash') - duration = float_or_none(try_get( - video, lambda x: x['annotations'][0]['end_time'], compat_str)) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } - - -class CarambaTVPageIE(InfoExtractor): - _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)' - _TEST = { - 'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/', - 'md5': 'a49fb0ec2ad66503eeb46aac237d3c86', - 'info_dict': { - 'id': '475222', - 'ext': 'flv', - 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', - 'thumbnail': r're:^https?://.*\.jpg', - # duration reported by videomore is incorrect - 'duration': int, - }, - 'add_ie': [VideomoreIE.ie_key()], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - videomore_url = VideomoreIE._extract_url(webpage) - if not videomore_url: - videomore_id = self._search_regex( - r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id', - default=None) - if videomore_id: - videomore_url = 'videomore:%s' % videomore_id - if videomore_url: - title = self._og_search_title(webpage) - return { - '_type': 'url_transparent', - 'url': videomore_url, - 'ie_key': VideomoreIE.ie_key(), - 'title': title, - } - - video_url = self._og_search_property('video:iframe', webpage, default=None) - - if not video_url: - video_id = self._search_regex( - r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)', - webpage, 'video id') - video_url = 'carambatv:%s' % video_id - - return self.url_result(video_url, CarambaTVIE.ie_key()) diff --git a/yt_dlp/extractor/channel9.py b/yt_dlp/extractor/channel9.py deleted file mode 100644 index a88474060..000000000 --- a/yt_dlp/extractor/channel9.py +++ /dev/null @@ -1,252 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - clean_html, - int_or_none, - parse_iso8601, - qualities, - unescapeHTML, -) - - -class Channel9IE(InfoExtractor): - IE_DESC = 'Channel 9' - IE_NAME = 'channel9' - _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' - _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b'] - - _TESTS = [{ - 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', - 'md5': '32083d4eaf1946db6d454313f44510ca', - 'info_dict': { - 'id': '6c413323-383a-49dc-88f9-a22800cab024', - 'ext': 'wmv', - 'title': 'Developer Kick-Off Session: Stuff We Love', - 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731', - 'duration': 4576, - 'thumbnail': r're:https?://.*\.jpg', - 'timestamp': 1377717420, - 'upload_date': '20130828', - 'session_code': 'KOS002', - 'session_room': 'Arena 1A', - 'session_speakers': 'count:5', - }, - }, { - 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc', - 'info_dict': { - 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024', - 'ext': 'wmv', - 'title': 'Self-service BI with Power BI - nuclear testing', - 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54', - 'duration': 1540, - 'thumbnail': r're:https?://.*\.jpg', - 'timestamp': 1386381991, - 'upload_date': '20131207', - 'authors': ['Mike Wilmot'], - }, - }, { - # low quality mp4 is best - 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', - 'info_dict': { - 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76', - 'ext': 'mp4', - 'title': 'Ranges for the Standard Library', - 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372', - 'duration': 5646, - 'thumbnail': r're:https?://.*\.jpg', - 'upload_date': '20150930', - 'timestamp': 1443640735, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', - 'info_dict': { - 'id': 'Events/DEVintersection/DEVintersection-2016', - 'title': 'DEVintersection 2016 Orlando Sessions', - }, - 'playlist_mincount': 14, - }, { - 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', - 'only_matching': True, - }, { - 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', - 'only_matching': True, - }] - - _RSS_URL = 'http://channel9.msdn.com/%s/RSS' - - def _extract_list(self, video_id, rss_url=None): - if not rss_url: - rss_url = self._RSS_URL % video_id - rss = self._download_xml(rss_url, video_id, 'Downloading RSS') - entries = [self.url_result(session_url.text, 'Channel9') - for session_url in rss.findall('./channel/item/link')] - title_text = rss.find('./channel/title').text - return self.playlist_result(entries, video_id, title_text) - - def _real_extract(self, url): - content_path, rss = self._match_valid_url(url).groups() - - if rss: - return self._extract_list(content_path, url) - - webpage = self._download_webpage( - url, content_path, 'Downloading web page') - - episode_data = self._search_regex( - r"data-episode='([^']+)'", webpage, 'episode data', default=None) - if episode_data: - episode_data = self._parse_json(unescapeHTML( - episode_data), content_path) - content_id = episode_data['contentId'] - is_session = '/Sessions(' in episode_data['api'] - content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,' - if is_session: - content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers' - else: - content_url += 'Authors,Body&$expand=Authors' - content_data = self._download_json(content_url, content_id) - title = content_data['Title'] - - QUALITIES = ( - 'mp3', - 'wmv', 'mp4', - 'wmv-low', 'mp4-low', - 'wmv-mid', 'mp4-mid', - 'wmv-high', 'mp4-high', - ) - - quality_key = qualities(QUALITIES) - - def quality(quality_id, format_url): - return (len(QUALITIES) if '_Source.' in format_url - else quality_key(quality_id)) - - formats = [] - urls = set() - - SITE_QUALITIES = { - 'MP3': 'mp3', - 'MP4': 'mp4', - 'Low Quality WMV': 'wmv-low', - 'Low Quality MP4': 'mp4-low', - 'Mid Quality WMV': 'wmv-mid', - 'Mid Quality MP4': 'mp4-mid', - 'High Quality WMV': 'wmv-high', - 'High Quality MP4': 'mp4-high', - } - - formats_select = self._search_regex( - r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage, - 'formats select', default=None) - if formats_select: - for mobj in re.finditer( - r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<', - formats_select): - format_url = mobj.group('url') - if format_url in urls: - continue - urls.add(format_url) - format_id = mobj.group('format') - quality_id = SITE_QUALITIES.get(format_id, format_id) - formats.append({ - 'url': format_url, - 'format_id': quality_id, - 'quality': quality(quality_id, format_url), - 'vcodec': 'none' if quality_id == 'mp3' else None, - }) - - API_QUALITIES = { - 'VideoMP4Low': 'mp4-low', - 'VideoWMV': 'wmv-mid', - 'VideoMP4Medium': 'mp4-mid', - 'VideoMP4High': 'mp4-high', - 'VideoWMVHQ': 'wmv-hq', - } - - for format_id, q in API_QUALITIES.items(): - q_url = content_data.get(format_id) - if not q_url or q_url in urls: - continue - urls.add(q_url) - formats.append({ - 'url': q_url, - 'format_id': q, - 'quality': quality(q, q_url), - }) - - slides = content_data.get('Slides') - zip_file = content_data.get('ZipFile') - - if not formats and not slides and not zip_file: - self.raise_no_formats( - 'None of recording, slides or zip are available for %s' % content_path) - - subtitles = {} - for caption in content_data.get('Captions', []): - caption_url = caption.get('Url') - if not caption_url: - continue - subtitles.setdefault(caption.get('Language', 'en'), []).append({ - 'url': caption_url, - 'ext': 'vtt', - }) - - common = { - 'id': content_id, - 'title': title, - 'description': clean_html(content_data.get('Description') or content_data.get('Body')), - 'thumbnail': content_data.get('VideoPlayerPreviewImage'), - 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), - 'timestamp': parse_iso8601(content_data.get('PublishedDate')), - 'avg_rating': int_or_none(content_data.get('Rating')), - 'rating_count': int_or_none(content_data.get('RatingCount')), - 'view_count': int_or_none(content_data.get('Views')), - 'comment_count': int_or_none(content_data.get('CommentCount')), - 'subtitles': subtitles, - } - if is_session: - speakers = [] - for s in content_data.get('Speakers', []): - speaker_name = s.get('FullName') - if not speaker_name: - continue - speakers.append(speaker_name) - - common.update({ - 'session_code': content_data.get('Code'), - 'session_room': content_data.get('Room'), - 'session_speakers': speakers, - }) - else: - authors = [] - for a in content_data.get('Authors', []): - author_name = a.get('DisplayName') - if not author_name: - continue - authors.append(author_name) - common['authors'] = authors - - contents = [] - - if slides: - d = common.copy() - d.update({'title': title + '-Slides', 'url': slides}) - contents.append(d) - - if zip_file: - d = common.copy() - d.update({'title': title + '-Zip', 'url': zip_file}) - contents.append(d) - - if formats: - d = common.copy() - d.update({'title': title, 'formats': formats}) - contents.append(d) - return self.playlist_result(contents) - else: - return self._extract_list(content_path) diff --git a/yt_dlp/extractor/chirbit.py b/yt_dlp/extractor/chirbit.py deleted file mode 100644 index 452711d97..000000000 --- a/yt_dlp/extractor/chirbit.py +++ /dev/null @@ -1,88 +0,0 @@ -import re - -from .common import InfoExtractor -from ..compat import compat_b64decode -from ..utils import parse_duration - - -class ChirbitIE(InfoExtractor): - IE_NAME = 'chirbit' - _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)' - _TESTS = [{ - 'url': 'http://chirb.it/be2abG', - 'info_dict': { - 'id': 'be2abG', - 'ext': 'mp3', - 'title': 'md5:f542ea253f5255240be4da375c6a5d7e', - 'description': 'md5:f24a4e22a71763e32da5fed59e47c770', - 'duration': 306, - 'uploader': 'Gerryaudio', - }, - 'params': { - 'skip_download': True, - } - }, { - 'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', - 'only_matching': True, - }, { - 'url': 'https://chirb.it/wp/MN58c2', - 'only_matching': True, - }] - - def _real_extract(self, url): - audio_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://chirb.it/%s' % audio_id, audio_id) - - data_fd = self._search_regex( - r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1', - webpage, 'data fd', group='url') - - # Reverse engineered from https://chirb.it/js/chirbit.player.js (look - # for soundURL) - audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8') - - title = self._search_regex( - r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') - description = self._search_regex( - r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>', - webpage, 'description', default=None) - duration = parse_duration(self._search_regex( - r'class=["\']c-length["\'][^>]*>([^<]+)', - webpage, 'duration', fatal=False)) - uploader = self._search_regex( - r'id=["\']chirbit-username["\'][^>]*>([^<]+)', - webpage, 'uploader', fatal=False) - - return { - 'id': audio_id, - 'url': audio_url, - 'title': title, - 'description': description, - 'duration': duration, - 'uploader': uploader, - } - - -class ChirbitProfileIE(InfoExtractor): - IE_NAME = 'chirbit:profile' - _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)' - _TEST = { - 'url': 'http://chirbit.com/ScarletBeauty', - 'info_dict': { - 'id': 'ScarletBeauty', - }, - 'playlist_mincount': 3, - } - - def _real_extract(self, url): - profile_id = self._match_id(url) - - webpage = self._download_webpage(url, profile_id) - - entries = [ - self.url_result(self._proto_relative_url('//chirb.it/' + video_id)) - for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)] - - return self.playlist_result(entries, profile_id) diff --git a/yt_dlp/extractor/cinchcast.py b/yt_dlp/extractor/cinchcast.py deleted file mode 100644 index 7a7ea8b22..000000000 --- a/yt_dlp/extractor/cinchcast.py +++ /dev/null @@ -1,56 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - unified_strdate, - xpath_text, -) - - -class CinchcastIE(InfoExtractor): - _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)' - _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1'] - - _TESTS = [{ - 'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single', - 'info_dict': { - 'id': '5258197', - 'ext': 'mp3', - 'title': 'Train Your Brain to Up Your Game with Coach Mandy', - 'upload_date': '20130816', - }, - }, { - # Actual test is run in generic, look for undergroundwellness - 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - doc = self._download_xml( - 'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, - video_id) - - item = doc.find('.//item') - title = xpath_text(item, './title', fatal=True) - date_str = xpath_text( - item, './{http://developer.longtailvideo.com/trac/}date') - upload_date = unified_strdate(date_str, day_first=False) - # duration is present but wrong - formats = [{ - 'format_id': 'main', - 'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'], - }] - backup_url = xpath_text( - item, './{http://developer.longtailvideo.com/trac/}backupContent') - if backup_url: - formats.append({ - 'preference': 2, # seems to be more reliable - 'format_id': 'backup', - 'url': backup_url, - }) - - return { - 'id': video_id, - 'title': title, - 'upload_date': upload_date, - 'formats': formats, - } diff --git a/yt_dlp/extractor/clipsyndicate.py b/yt_dlp/extractor/clipsyndicate.py deleted file mode 100644 index 606444321..000000000 --- a/yt_dlp/extractor/clipsyndicate.py +++ /dev/null @@ -1,52 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - find_xpath_attr, - fix_xml_ampersands -) - - -class ClipsyndicateIE(InfoExtractor): - _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' - - _TESTS = [{ - 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', - 'md5': '4d7d549451bad625e0ff3d7bd56d776c', - 'info_dict': { - 'id': '4629301', - 'ext': 'mp4', - 'title': 'Brick Briscoe', - 'duration': 612, - 'thumbnail': r're:^https?://.+\.jpg', - }, - }, { - 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - js_player = self._download_webpage( - 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id, - video_id, 'Downlaoding player') - # it includes a required token - flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars') - - pdoc = self._download_xml( - 'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, - video_id, 'Downloading video info', - transform_source=fix_xml_ampersands) - - track_doc = pdoc.find('trackList/track') - - def find_param(name): - node = find_xpath_attr(track_doc, './/param', 'name', name) - if node is not None: - return node.attrib['value'] - - return { - 'id': video_id, - 'title': find_param('title'), - 'url': track_doc.find('location').text, - 'thumbnail': find_param('thumbnail'), - 'duration': int(find_param('duration')), - } diff --git a/yt_dlp/extractor/cloudy.py b/yt_dlp/extractor/cloudy.py deleted file mode 100644 index 848643e26..000000000 --- a/yt_dlp/extractor/cloudy.py +++ /dev/null @@ -1,57 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - str_to_int, - unified_strdate, -) - - -class CloudyIE(InfoExtractor): - _IE_DESC = 'cloudy.ec' - _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' - _TESTS = [{ - 'url': 'https://www.cloudy.ec/v/af511e2527aac', - 'md5': '29832b05028ead1b58be86bf319397ca', - 'info_dict': { - 'id': 'af511e2527aac', - 'ext': 'mp4', - 'title': 'Funny Cats and Animals Compilation june 2013', - 'upload_date': '20130913', - 'view_count': int, - } - }, { - 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://www.cloudy.ec/embed.php', video_id, query={ - 'id': video_id, - 'playerPage': 1, - 'autoplay': 1, - }) - - info = self._parse_html5_media_entries(url, webpage, video_id)[0] - - webpage = self._download_webpage( - 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False) - - if webpage: - info.update({ - 'title': self._search_regex( - r'<h\d[^>]*>([^<]+)<', webpage, 'title'), - 'upload_date': unified_strdate(self._search_regex( - r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage, - 'upload date', fatal=False)), - 'view_count': str_to_int(self._search_regex( - r'([\d,.]+) views<', webpage, 'view count', fatal=False)), - }) - - if not info.get('title'): - info['title'] = video_id - - info['id'] = video_id - - return info diff --git a/yt_dlp/extractor/clubic.py b/yt_dlp/extractor/clubic.py index 403e44aaf..716f25969 100644 --- a/yt_dlp/extractor/clubic.py +++ b/yt_dlp/extractor/clubic.py @@ -6,6 +6,7 @@ class ClubicIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' _TESTS = [{ diff --git a/yt_dlp/extractor/cmt.py b/yt_dlp/extractor/cmt.py index 8aed7708b..6359102aa 100644 --- a/yt_dlp/extractor/cmt.py +++ b/yt_dlp/extractor/cmt.py @@ -4,6 +4,7 @@ class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE + _WORKING = False IE_NAME = 'cmt.com' _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)' diff --git a/yt_dlp/extractor/daftsex.py b/yt_dlp/extractor/daftsex.py deleted file mode 100644 index 92510c767..000000000 --- a/yt_dlp/extractor/daftsex.py +++ /dev/null @@ -1,150 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_b64decode -from ..utils import ( - ExtractorError, - int_or_none, - js_to_json, - parse_count, - parse_duration, - traverse_obj, - try_get, - unified_timestamp, -) - - -class DaftsexIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?daft\.sex/watch/(?P<id>-?\d+_\d+)' - _TESTS = [{ - 'url': 'https://daft.sex/watch/-35370899_456246186', - 'md5': '64c04ef7b4c7b04b308f3b0c78efe7cd', - 'info_dict': { - 'id': '-35370899_456246186', - 'ext': 'mp4', - 'title': 'just relaxing', - 'description': 'just relaxing – Watch video Watch video in high quality', - 'upload_date': '20201113', - 'timestamp': 1605261911, - 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 18, - 'duration': 15.0, - 'view_count': int - }, - }, { - 'url': 'https://daft.sex/watch/-156601359_456242791', - 'info_dict': { - 'id': '-156601359_456242791', - 'ext': 'mp4', - 'title': 'Skye Blue - Dinner And A Show', - 'description': 'Skye Blue - Dinner And A Show - Watch video Watch video in high quality', - 'upload_date': '20200916', - 'timestamp': 1600250735, - 'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ', - }, - 'skip': 'deleted / private' - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._html_search_meta('name', webpage, 'title') - timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None)) - description = self._html_search_meta('description', webpage, 'Description', default=None) - - duration = parse_duration(self._search_regex( - r'Duration: ((?:[0-9]{2}:){0,2}[0-9]{2})', - webpage, 'duration', fatal=False)) - views = parse_count(self._search_regex( - r'Views: ([0-9 ]+)', - webpage, 'views', fatal=False)) - - player_hash = self._search_regex( - r'DaxabPlayer\.Init\({[\s\S]*hash:\s*"([0-9a-zA-Z_\-]+)"[\s\S]*}', - webpage, 'player hash') - player_color = self._search_regex( - r'DaxabPlayer\.Init\({[\s\S]*color:\s*"([0-9a-z]+)"[\s\S]*}', - webpage, 'player color', fatal=False) or '' - - embed_page = self._download_webpage( - 'https://dxb.to/player/%s?color=%s' % (player_hash, player_color), - video_id, headers={'Referer': url}) - video_params = self._parse_json( - self._search_regex( - r'window\.globParams\s*=\s*({[\S\s]+})\s*;\s*<\/script>', - embed_page, 'video parameters'), - video_id, transform_source=js_to_json) - - server_domain = 'https://%s' % compat_b64decode(video_params['server'][::-1]).decode('utf-8') - - cdn_files = traverse_obj(video_params, ('video', 'cdn_files')) or {} - if cdn_files: - formats = [] - for format_id, format_data in cdn_files.items(): - ext, height = format_id.split('_') - formats.append({ - 'format_id': format_id, - 'url': f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={format_data.split(".")[-1]}', - 'height': int_or_none(height), - 'ext': ext, - }) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'duration': duration, - 'thumbnail': try_get(video_params, lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8')), - 'timestamp': timestamp, - 'view_count': views, - 'age_limit': 18, - } - - items = self._download_json( - f'{server_domain}/method/video.get/{video_id}', video_id, - headers={'Referer': url}, query={ - 'token': video_params['video']['access_token'], - 'videos': video_id, - 'ckey': video_params['c_key'], - 'credentials': video_params['video']['credentials'], - })['response']['items'] - - if not items: - raise ExtractorError('Video is not available', video_id=video_id, expected=True) - - item = items[0] - formats = [] - for f_id, f_url in item.get('files', {}).items(): - if f_id == 'external': - return self.url_result(f_url) - ext, height = f_id.split('_') - height_extra_key = traverse_obj(video_params, ('video', 'partial', 'quality', height)) - if height_extra_key: - formats.append({ - 'format_id': f'{height}p', - 'url': f'{server_domain}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}', - 'height': int_or_none(height), - 'ext': ext, - }) - - thumbnails = [] - for k, v in item.items(): - if k.startswith('photo_') and v: - width = k.replace('photo_', '') - thumbnails.append({ - 'id': width, - 'url': v, - 'width': int_or_none(width), - }) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'comment_count': int_or_none(item.get('comments')), - 'description': description, - 'duration': duration, - 'thumbnails': thumbnails, - 'timestamp': timestamp, - 'view_count': views, - 'age_limit': 18, - } diff --git a/yt_dlp/extractor/defense.py b/yt_dlp/extractor/defense.py deleted file mode 100644 index 7d73ea862..000000000 --- a/yt_dlp/extractor/defense.py +++ /dev/null @@ -1,37 +0,0 @@ -from .common import InfoExtractor - - -class DefenseGouvFrIE(InfoExtractor): - IE_NAME = 'defense.gouv.fr' - _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' - - _TEST = { - 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', - 'md5': '75bba6124da7e63d2d60b5244ec9430c', - 'info_dict': { - 'id': '11213', - 'ext': 'mp4', - 'title': 'attaque-chimique-syrienne-du-21-aout-2013-1' - } - } - - def _real_extract(self, url): - title = self._match_id(url) - webpage = self._download_webpage(url, title) - - video_id = self._search_regex( - r"flashvars.pvg_id=\"(\d+)\";", - webpage, 'ID') - - json_url = ( - 'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % - video_id) - info = self._download_json(json_url, title, 'Downloading JSON config') - video_url = info['renditions'][0]['url'] - - return { - 'id': video_id, - 'ext': 'mp4', - 'url': video_url, - 'title': title, - } diff --git a/yt_dlp/extractor/dhm.py b/yt_dlp/extractor/dhm.py index 3d42fc2b0..a5f5f794c 100644 --- a/yt_dlp/extractor/dhm.py +++ b/yt_dlp/extractor/dhm.py @@ -3,6 +3,7 @@ class DHMIE(InfoExtractor): + _WORKING = False IE_DESC = 'Filmarchiv - Deutsches Historisches Museum' _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)' diff --git a/yt_dlp/extractor/dotsub.py b/yt_dlp/extractor/dotsub.py deleted file mode 100644 index 079f83750..000000000 --- a/yt_dlp/extractor/dotsub.py +++ /dev/null @@ -1,81 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, -) - - -class DotsubIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' - _TESTS = [{ - 'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09', - 'md5': '21c7ff600f545358134fea762a6d42b6', - 'info_dict': { - 'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09', - 'ext': 'flv', - 'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever', - 'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6', - 'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p', - 'duration': 198, - 'uploader': 'liuxt', - 'timestamp': 1385778501.104, - 'upload_date': '20131130', - 'view_count': int, - } - }, { - 'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6', - 'md5': '2bb4a83896434d5c26be868c609429a3', - 'info_dict': { - 'id': '168006778', - 'ext': 'mp4', - 'title': 'Apartments and flats in Raipur the white symphony', - 'description': 'md5:784d0639e6b7d1bc29530878508e38fe', - 'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p', - 'duration': 290, - 'timestamp': 1476767794.2809999, - 'upload_date': '20161018', - 'uploader': 'parthivi001', - 'uploader_id': 'user52596202', - 'view_count': int, - }, - 'add_ie': ['Vimeo'], - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - info = self._download_json( - 'https://dotsub.com/api/media/%s/metadata' % video_id, video_id) - video_url = info.get('mediaURI') - - if not video_url: - webpage = self._download_webpage(url, video_id) - video_url = self._search_regex( - [r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'], - webpage, 'video url', default=None) - info_dict = { - 'id': video_id, - 'url': video_url, - 'ext': 'flv', - } - - if not video_url: - setup_data = self._parse_json(self._html_search_regex( - r'(?s)data-setup=([\'"])(?P<content>(?!\1).+?)\1', - webpage, 'setup data', group='content'), video_id) - info_dict = { - '_type': 'url_transparent', - 'url': setup_data['src'], - } - - info_dict.update({ - 'title': info['title'], - 'description': info.get('description'), - 'thumbnail': info.get('screenshotURI'), - 'duration': int_or_none(info.get('duration'), 1000), - 'uploader': info.get('user'), - 'timestamp': float_or_none(info.get('dateCreated'), 1000), - 'view_count': int_or_none(info.get('numberOfViews')), - }) - - return info_dict diff --git a/yt_dlp/extractor/echomsk.py b/yt_dlp/extractor/echomsk.py deleted file mode 100644 index 850eabbff..000000000 --- a/yt_dlp/extractor/echomsk.py +++ /dev/null @@ -1,43 +0,0 @@ -import re - -from .common import InfoExtractor - - -class EchoMskIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)' - _TEST = { - 'url': 'http://www.echo.msk.ru/sounds/1464134.html', - 'md5': '2e44b3b78daff5b458e4dbc37f191f7c', - 'info_dict': { - 'id': '1464134', - 'ext': 'mp3', - 'title': 'Особое мнение - 29 декабря 2014, 19:08', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - audio_url = self._search_regex( - r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL') - - title = self._html_search_regex( - r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>', - webpage, 'title') - - air_date = self._html_search_regex( - r'(?s)<div class="date">(.+?)</div>', - webpage, 'date', fatal=False, default=None) - - if air_date: - air_date = re.sub(r'(\s)\1+', r'\1', air_date) - if air_date: - title = '%s - %s' % (title, air_date) - - return { - 'id': video_id, - 'url': audio_url, - 'title': title, - } diff --git a/yt_dlp/extractor/ehow.py b/yt_dlp/extractor/ehow.py deleted file mode 100644 index 74469ce36..000000000 --- a/yt_dlp/extractor/ehow.py +++ /dev/null @@ -1,36 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote - - -class EHowIE(InfoExtractor): - IE_NAME = 'eHow' - _VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html', - 'md5': '9809b4e3f115ae2088440bcb4efbf371', - 'info_dict': { - 'id': '12245069', - 'ext': 'flv', - 'title': 'Hardwood Flooring Basics', - 'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...', - 'uploader': 'Erick Nathan', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_url = self._search_regex( - r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL') - final_url = compat_urllib_parse_unquote(video_url) - uploader = self._html_search_meta('uploader', webpage) - title = self._og_search_title(webpage).replace(' | eHow', '') - - return { - 'id': video_id, - 'url': final_url, - 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), - 'description': self._og_search_description(webpage), - 'uploader': uploader, - } diff --git a/yt_dlp/extractor/elevensports.py b/yt_dlp/extractor/elevensports.py deleted file mode 100644 index 99c52b3a9..000000000 --- a/yt_dlp/extractor/elevensports.py +++ /dev/null @@ -1,59 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - parse_iso8601, - traverse_obj, - url_or_none, -) - - -class ElevenSportsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?elevensports\.com/view/event/(?P<id>\w+)' - _TESTS = [{ - 'url': 'https://elevensports.com/view/event/clf46yr3kenn80jgrqsjmwefk', - 'md5': 'c0958d9ff90e4503a75544358758921d', - 'info_dict': { - 'id': 'clf46yr3kenn80jgrqsjmwefk', - 'title': 'Cleveland SC vs Lionsbridge FC', - 'ext': 'mp4', - 'description': 'md5:03b5238d6549f4ea1fddadf69b5e0b58', - 'upload_date': '20230323', - 'timestamp': 1679612400, - 'thumbnail': r're:^https?://.*\.(?:jpg|png)', - }, - 'params': {'skip_download': 'm3u8'} - }, { - 'url': 'https://elevensports.com/view/event/clhpyd53b06160jez74qhgkmf', - 'md5': 'c0958d9ff90e4503a75544358758921d', - 'info_dict': { - 'id': 'clhpyd53b06160jez74qhgkmf', - 'title': 'AJNLF vs ARRAF', - 'ext': 'mp4', - 'description': 'md5:c8c5e75c78f37c6d15cd6c475e43a8c1', - 'upload_date': '20230521', - 'timestamp': 1684684800, - 'thumbnail': r're:^https?://.*\.(?:jpg|png)', - }, - 'params': {'skip_download': 'm3u8'} - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - event_id = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['event']['mclsEventId'] - event_data = self._download_json( - f'https://mcls-api.mycujoo.tv/bff/events/v1beta1/{event_id}', video_id, - headers={'Authorization': 'Bearer FBVKACGN37JQC5SFA0OVK8KKSIOP153G'}) - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - event_data['streams'][0]['full_url'], video_id, 'mp4', m3u8_id='hls') - - return { - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - **traverse_obj(event_data, { - 'title': ('title', {str}), - 'description': ('description', {str}), - 'timestamp': ('start_time', {parse_iso8601}), - 'thumbnail': ('thumbnail_url', {url_or_none}), - }), - } diff --git a/yt_dlp/extractor/ellentube.py b/yt_dlp/extractor/ellentube.py deleted file mode 100644 index 6eb00f9c9..000000000 --- a/yt_dlp/extractor/ellentube.py +++ /dev/null @@ -1,130 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - clean_html, - extract_attributes, - float_or_none, - int_or_none, - try_get, -) - - -class EllenTubeBaseIE(InfoExtractor): - def _extract_data_config(self, webpage, video_id): - details = self._search_regex( - r'(<[^>]+\bdata-component=(["\'])[Dd]etails.+?></div>)', webpage, - 'details') - return self._parse_json( - extract_attributes(details)['data-config'], video_id) - - def _extract_video(self, data, video_id): - title = data['title'] - - formats = [] - duration = None - for entry in data.get('media'): - if entry.get('id') == 'm3u8': - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - entry['url'], video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - duration = int_or_none(entry.get('duration')) - break - - def get_insight(kind): - return int_or_none(try_get( - data, lambda x: x['insight']['%ss' % kind])) - - return { - 'extractor_key': EllenTubeIE.ie_key(), - 'id': video_id, - 'title': title, - 'description': data.get('description'), - 'duration': duration, - 'thumbnail': data.get('thumbnail'), - 'timestamp': float_or_none(data.get('publishTime'), scale=1000), - 'view_count': get_insight('view'), - 'like_count': get_insight('like'), - 'formats': formats, - 'subtitles': subtitles, - } - - -class EllenTubeIE(EllenTubeBaseIE): - _VALID_URL = r'''(?x) - (?: - ellentube:| - https://api-prod\.ellentube\.com/ellenapi/api/item/ - ) - (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}) - ''' - _TESTS = [{ - 'url': 'https://api-prod.ellentube.com/ellenapi/api/item/0822171c-3829-43bf-b99f-d77358ae75e3', - 'md5': '2fabc277131bddafdd120e0fc0f974c9', - 'info_dict': { - 'id': '0822171c-3829-43bf-b99f-d77358ae75e3', - 'ext': 'mp4', - 'title': 'Ellen Meets Las Vegas Survivors Jesus Campos and Stephen Schuck', - 'description': 'md5:76e3355e2242a78ad9e3858e5616923f', - 'thumbnail': r're:^https?://.+?', - 'duration': 514, - 'timestamp': 1508505120, - 'upload_date': '20171020', - 'view_count': int, - 'like_count': int, - } - }, { - 'url': 'ellentube:734a3353-f697-4e79-9ca9-bfc3002dc1e0', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - data = self._download_json( - 'https://api-prod.ellentube.com/ellenapi/api/item/%s' % video_id, - video_id) - return self._extract_video(data, video_id) - - -class EllenTubeVideoIE(EllenTubeBaseIE): - _VALID_URL = r'https?://(?:www\.)?ellentube\.com/video/(?P<id>.+?)\.html' - _TEST = { - 'url': 'https://www.ellentube.com/video/ellen-meets-las-vegas-survivors-jesus-campos-and-stephen-schuck.html', - 'only_matching': True, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._extract_data_config(webpage, display_id)['id'] - return self.url_result( - 'ellentube:%s' % video_id, ie=EllenTubeIE.ie_key(), - video_id=video_id) - - -class EllenTubePlaylistIE(EllenTubeBaseIE): - _VALID_URL = r'https?://(?:www\.)?ellentube\.com/(?:episode|studios)/(?P<id>.+?)\.html' - _TESTS = [{ - 'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html', - 'info_dict': { - 'id': 'dax-shepard-jordan-fisher-haim', - 'title': "Dax Shepard, 'DWTS' Team Jordan Fisher & Lindsay Arnold, HAIM", - 'description': 'md5:bfc982194dabb3f4e325e43aa6b2e21c', - }, - 'playlist_count': 6, - }, { - 'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - data = self._extract_data_config(webpage, display_id)['data'] - feed = self._download_json( - 'https://api-prod.ellentube.com/ellenapi/api/feed/?%s' - % data['filter'], display_id) - entries = [ - self._extract_video(elem, elem['id']) - for elem in feed if elem.get('type') == 'VIDEO' and elem.get('id')] - return self.playlist_result( - entries, display_id, data.get('title'), - clean_html(data.get('description'))) diff --git a/yt_dlp/extractor/engadget.py b/yt_dlp/extractor/engadget.py deleted file mode 100644 index e7c5d7bf1..000000000 --- a/yt_dlp/extractor/engadget.py +++ /dev/null @@ -1,15 +0,0 @@ -from .common import InfoExtractor - - -class EngadgetIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)' - - _TESTS = [{ - # video with vidible ID - 'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result('aol-video:%s' % video_id) diff --git a/yt_dlp/extractor/escapist.py b/yt_dlp/extractor/escapist.py deleted file mode 100644 index 85a1cbf40..000000000 --- a/yt_dlp/extractor/escapist.py +++ /dev/null @@ -1,108 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - determine_ext, - clean_html, - int_or_none, - float_or_none, -) - - -def _decrypt_config(key, string): - a = '' - i = '' - r = '' - - while len(a) < (len(string) / 2): - a += key - - a = a[0:int(len(string) / 2)] - - t = 0 - while t < len(string): - i += chr(int(string[t] + string[t + 1], 16)) - t += 2 - - icko = [s for s in i] - - for t, c in enumerate(a): - r += chr(ord(c) ^ ord(icko[t])) - - return r - - -class EscapistIE(InfoExtractor): - _VALID_URL = r'https?://?(?:(?:www|v1)\.)?escapistmagazine\.com/videos/view/[^/]+/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', - 'md5': 'ab3a706c681efca53f0a35f1415cf0d1', - 'info_dict': { - 'id': '6618', - 'ext': 'mp4', - 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", - 'title': "Breaking Down Baldur's Gate", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 264, - 'uploader': 'The Escapist', - } - }, { - 'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer', - 'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf', - 'info_dict': { - 'id': '10044', - 'ext': 'mp4', - 'description': 'This week, Zero Punctuation reviews Evolve.', - 'title': 'Evolve - One vs Multiplayer', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 304, - 'uploader': 'The Escapist', - } - }, { - 'url': 'http://escapistmagazine.com/videos/view/the-escapist-presents/6618', - 'only_matching': True, - }, { - 'url': 'https://v1.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - ims_video = self._parse_json( - self._search_regex( - r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'), - video_id) - video_id = ims_video['videoID'] - key = ims_video['hash'] - - config = self._download_webpage( - 'http://www.escapistmagazine.com/videos/vidconfig.php', - video_id, 'Downloading video config', headers={ - 'Referer': url, - }, query={ - 'videoID': video_id, - 'hash': key, - }) - - data = self._parse_json(_decrypt_config(key, config), video_id) - - video_data = data['videoData'] - - title = clean_html(video_data['title']) - - formats = [{ - 'url': video['src'], - 'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']), - 'height': int_or_none(video.get('res')), - } for video in data['files']['videos']] - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage) or data.get('poster'), - 'description': self._og_search_description(webpage), - 'duration': float_or_none(video_data.get('duration'), 1000), - 'uploader': video_data.get('publisher'), - 'series': video_data.get('show'), - } diff --git a/yt_dlp/extractor/esri.py b/yt_dlp/extractor/esri.py deleted file mode 100644 index 02e7efaf0..000000000 --- a/yt_dlp/extractor/esri.py +++ /dev/null @@ -1,70 +0,0 @@ -import re - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - int_or_none, - parse_filesize, - unified_strdate, -) - - -class EsriVideoIE(InfoExtractor): - _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' - _TEST = { - 'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications', - 'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc', - 'info_dict': { - 'id': '1124', - 'ext': 'mp4', - 'title': 'ArcGIS Online - Developing Applications', - 'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 185, - 'upload_date': '20120419', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - formats = [] - for width, height, content in re.findall( - r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage): - for video_url, ext, filesize in re.findall( - r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content): - formats.append({ - 'url': compat_urlparse.urljoin(url, video_url), - 'ext': ext.lower(), - 'format_id': '%s-%s' % (ext.lower(), height), - 'width': int(width), - 'height': int(height), - 'filesize_approx': parse_filesize(filesize), - }) - - title = self._html_search_meta('title', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description', fatal=False) - - thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False) - if thumbnail: - thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail) - - duration = int_or_none(self._search_regex( - [r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"], - webpage, 'duration', fatal=False)) - - upload_date = unified_strdate(self._html_search_meta( - 'last-modified', webpage, 'upload date', fatal=False)) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'upload_date': upload_date, - 'formats': formats - } diff --git a/yt_dlp/extractor/expotv.py b/yt_dlp/extractor/expotv.py deleted file mode 100644 index bda6e3cb2..000000000 --- a/yt_dlp/extractor/expotv.py +++ /dev/null @@ -1,74 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, -) - - -class ExpoTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])' - _TEST = { - 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916', - 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8', - 'info_dict': { - 'id': '667916', - 'ext': 'mp4', - 'title': 'NYX Butter Lipstick Little Susie', - 'description': 'Goes on like butter, but looks better!', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Stephanie S.', - 'upload_date': '20150520', - 'view_count': int, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - player_key = self._search_regex( - r'<param name="playerKey" value="([^"]+)"', webpage, 'player key') - config = self._download_json( - 'http://client.expotv.com/video/config/%s/%s' % (video_id, player_key), - video_id, 'Downloading video configuration') - - formats = [] - for fcfg in config['sources']: - media_url = fcfg.get('file') - if not media_url: - continue - if fcfg.get('type') == 'm3u8': - formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) - else: - formats.append({ - 'url': media_url, - 'height': int_or_none(fcfg.get('height')), - 'format_id': fcfg.get('label'), - 'ext': self._search_regex( - r'filename=.*\.([a-z0-9_A-Z]+)&', media_url, - 'file extension', default=None) or fcfg.get('type'), - }) - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = config.get('image') - view_count = int_or_none(self._search_regex( - r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts')) - uploader = self._search_regex( - r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader', - fatal=False) - upload_date = unified_strdate(self._search_regex( - r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date', - fatal=False), day_first=False) - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'description': description, - 'view_count': view_count, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, - } diff --git a/yt_dlp/extractor/extremetube.py b/yt_dlp/extractor/extremetube.py deleted file mode 100644 index 2c1969899..000000000 --- a/yt_dlp/extractor/extremetube.py +++ /dev/null @@ -1,48 +0,0 @@ -from ..utils import str_to_int -from .keezmovies import KeezMoviesIE - - -class ExtremeTubeIE(KeezMoviesIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)' - _TESTS = [{ - 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', - 'md5': '92feaafa4b58e82f261e5419f39c60cb', - 'info_dict': { - 'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431', - 'ext': 'mp4', - 'title': 'Music Video 14 british euro brit european cumshots swallow', - 'uploader': 'anonim', - 'view_count': int, - 'age_limit': 18, - } - }, { - 'url': 'http://www.extremetube.com/gay/video/abcde-1234', - 'only_matching': True, - }, { - 'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick', - 'only_matching': True, - }, { - 'url': 'http://www.extremetube.com/video/652431', - 'only_matching': True, - }] - - def _real_extract(self, url): - webpage, info = self._extract_info(url) - - if not info['title']: - info['title'] = self._search_regex( - r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title') - - uploader = self._html_search_regex( - r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>', - webpage, 'uploader', fatal=False) - view_count = str_to_int(self._search_regex( - r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</', - webpage, 'view count', fatal=False)) - - info.update({ - 'uploader': uploader, - 'view_count': view_count, - }) - - return info diff --git a/yt_dlp/extractor/fourzerostudio.py b/yt_dlp/extractor/fourzerostudio.py deleted file mode 100644 index c388a3a07..000000000 --- a/yt_dlp/extractor/fourzerostudio.py +++ /dev/null @@ -1,106 +0,0 @@ -from .common import InfoExtractor -from ..utils import traverse_obj, unified_timestamp - - -class FourZeroStudioArchiveIE(InfoExtractor): - _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/broadcasts/(?P<id>[^/]+)/archive' - IE_NAME = '0000studio:archive' - _TESTS = [{ - 'url': 'https://0000.studio/mumeijiten/broadcasts/1290f433-fce0-4909-a24a-5f7df09665dc/archive', - 'info_dict': { - 'id': '1290f433-fce0-4909-a24a-5f7df09665dc', - 'title': 'noteで『canape』様へのファンレターを執筆します。(数秘術その2)', - 'timestamp': 1653802534, - 'release_timestamp': 1653796604, - 'thumbnails': 'count:1', - 'comments': 'count:7', - 'uploader': '『中崎雄心』の執務室。', - 'uploader_id': 'mumeijiten', - } - }] - - def _real_extract(self, url): - video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') - webpage = self._download_webpage(url, video_id) - nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None) - - pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False) - uploader_internal_id = traverse_obj(nuxt_data, ( - 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False) - - formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4') - - return { - 'id': video_id, - 'title': pcb.get('title'), - 'age_limit': 18 if pcb.get('isAdult') else None, - 'timestamp': unified_timestamp(pcb.get('finishTime')), - 'release_timestamp': unified_timestamp(pcb.get('createdAt')), - 'thumbnails': [{ - 'url': pcb['thumbnailUrl'], - 'ext': 'png', - }] if pcb.get('thumbnailUrl') else None, - 'formats': formats, - 'subtitles': subs, - 'comments': [{ - 'author': c.get('username'), - 'author_id': c.get('postedUserId'), - 'author_thumbnail': c.get('userThumbnailUrl'), - 'id': c.get('id'), - 'text': c.get('body'), - 'timestamp': unified_timestamp(c.get('createdAt')), - 'like_count': c.get('likeCount'), - 'is_favorited': c.get('isLikedByOwner'), - 'author_is_uploader': c.get('postedUserId') == uploader_internal_id, - } for c in traverse_obj(nuxt_data, ( - 'ssrRefs', ..., lambda _, v: v['__typename'] == 'PublicCreatorBroadcastComment')) or []], - 'uploader_id': uploader_id, - 'uploader': traverse_obj(nuxt_data, ( - 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False), - } - - -class FourZeroStudioClipIE(InfoExtractor): - _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/archive-clip/(?P<id>[^/]+)' - IE_NAME = '0000studio:clip' - _TESTS = [{ - 'url': 'https://0000.studio/soeji/archive-clip/e46b0278-24cd-40a8-92e1-b8fc2b21f34f', - 'info_dict': { - 'id': 'e46b0278-24cd-40a8-92e1-b8fc2b21f34f', - 'title': 'わたベーさんからイラスト差し入れいただきました。ありがとうございました!', - 'timestamp': 1652109105, - 'like_count': 1, - 'uploader': 'ソエジマケイタ', - 'uploader_id': 'soeji', - } - }] - - def _real_extract(self, url): - video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') - webpage = self._download_webpage(url, video_id) - nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None) - - clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False) - - info = next(( - m for m in self._parse_html5_media_entries(url, webpage, video_id) - if 'mp4' in traverse_obj(m, ('formats', ..., 'ext')) - ), None) - if not info: - self.report_warning('Failed to find a desired media element. Falling back to using NUXT data.') - info = { - 'formats': [{ - 'ext': 'mp4', - 'url': url, - } for url in clip_info.get('mediaFiles') or [] if url], - } - return { - **info, - 'id': video_id, - 'title': clip_info.get('clipComment'), - 'timestamp': unified_timestamp(clip_info.get('createdAt')), - 'like_count': clip_info.get('likeCount'), - 'uploader_id': uploader_id, - 'uploader': traverse_obj(nuxt_data, ( - 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False), - } diff --git a/yt_dlp/extractor/foxgay.py b/yt_dlp/extractor/foxgay.py deleted file mode 100644 index f4f29c65d..000000000 --- a/yt_dlp/extractor/foxgay.py +++ /dev/null @@ -1,58 +0,0 @@ -import itertools - -from .common import InfoExtractor -from ..utils import ( - get_element_by_id, - int_or_none, - remove_end, -) - - -class FoxgayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' - _TEST = { - 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', - 'md5': '344558ccfea74d33b7adbce22e577f54', - 'info_dict': { - 'id': '2582', - 'ext': 'mp4', - 'title': 'Fuck Turkish-style', - 'description': 'md5:6ae2d9486921891efe89231ace13ffdf', - 'age_limit': 18, - 'thumbnail': r're:https?://.*\.jpg$', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = remove_end(self._html_extract_title(webpage), ' - Foxgay.com') - description = get_element_by_id('inf_tit', webpage) - - # The default user-agent with foxgay cookies leads to pages without videos - self.cookiejar.clear('.foxgay.com') - # Find the URL for the iFrame which contains the actual video. - iframe_url = self._html_search_regex( - r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage, - 'video frame', group='url') - iframe = self._download_webpage( - iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'}, - note='Downloading video frame') - video_data = self._parse_json(self._search_regex( - r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id) - - formats = [{ - 'url': source, - 'height': int_or_none(resolution), - } for source, resolution in zip( - video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))] - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'thumbnail': video_data.get('act_vid', {}).get('thumb'), - 'age_limit': 18, - } diff --git a/yt_dlp/extractor/fusion.py b/yt_dlp/extractor/fusion.py deleted file mode 100644 index 689422fca..000000000 --- a/yt_dlp/extractor/fusion.py +++ /dev/null @@ -1,81 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - determine_ext, - int_or_none, - mimetype2ext, - parse_iso8601, -) - - -class FusionIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', - 'info_dict': { - 'id': '3145868', - 'ext': 'mp4', - 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', - 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', - 'duration': 140.0, - 'timestamp': 1442589635, - 'uploader': 'UNIVISON', - 'upload_date': '20150918', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Anvato'], - }, { - 'url': 'http://fusion.tv/video/201781', - 'only_matching': True, - }, { - 'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - video = self._download_json( - 'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id) - - info = { - 'id': video_id, - 'title': video['title'], - 'description': video.get('excerpt'), - 'timestamp': parse_iso8601(video.get('published')), - 'series': video.get('show'), - } - - formats = [] - src = video.get('src') or {} - for f_id, f in src.items(): - for q_id, q in f.items(): - q_url = q.get('url') - if not q_url: - continue - ext = determine_ext(q_url, mimetype2ext(q.get('type'))) - if ext == 'smil': - formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False)) - elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'): - formats.extend(self._extract_m3u8_formats( - q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'format_id': '-'.join([f_id, q_id]), - 'url': q_url, - 'width': int_or_none(q.get('width')), - 'height': int_or_none(q.get('height')), - 'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')), - 'ext': 'mp4' if ext == 'm3u8' else ext, - 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', - }) - if formats: - info['formats'] = formats - else: - info.update({ - '_type': 'url', - 'url': 'anvato:uni:' + video['video_ids']['anvato'], - 'ie_key': 'Anvato', - }) - - return info diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 1503e5146..606b4f5d1 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -374,46 +374,6 @@ class GenericIE(InfoExtractor): }, 'skip': 'There is a limit of 200 free downloads / month for the test song', }, - # ooyala video - { - 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', - 'md5': '166dd577b433b4d4ebfee10b0824d8ff', - 'info_dict': { - 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', - 'ext': 'mp4', - 'title': '2cc213299525360.mov', # that's what we get - 'duration': 238.231, - }, - 'add_ie': ['Ooyala'], - }, - { - # ooyala video embedded with http://player.ooyala.com/iframe.js - 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/', - 'info_dict': { - 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB', - 'ext': 'mp4', - 'title': '"Steve Jobs: Man in the Machine" trailer', - 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."', - 'duration': 135.427, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'movie expired', - }, - # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js - { - 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/', - 'info_dict': { - 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2', - 'ext': 'mp4', - 'title': 'Steampunk Fest Comes to Honesdale', - 'duration': 43.276, - }, - 'params': { - 'skip_download': True, - } - }, # embed.ly video { 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -506,7 +466,8 @@ class GenericIE(InfoExtractor): 'title': 'Ужастики, русский трейлер (2015)', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 153, - } + }, + 'skip': 'Site dead', }, # XHamster embed { @@ -778,14 +739,16 @@ class GenericIE(InfoExtractor): 'playlist_mincount': 1, 'add_ie': ['Youtube'], }, - # Cinchcast embed + # Libsyn embed { 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', 'info_dict': { - 'id': '7141703', + 'id': '3793998', 'ext': 'mp3', 'upload_date': '20141126', - 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', + 'title': 'Underground Wellness Radio - Jack Tips: 5 Steps to Permanent Gut Healing', + 'thumbnail': 'https://assets.libsyn.com/secure/item/3793998/?height=90&width=90', + 'duration': 3989.0, } }, # Cinerama player @@ -1567,16 +1530,6 @@ class GenericIE(InfoExtractor): 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', }, }, - { - # vzaar embed - 'url': 'http://help.vzaar.com/article/165-embedding-video', - 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4', - 'info_dict': { - 'id': '8707641', - 'ext': 'mp4', - 'title': 'Building A Business Online: Principal Chairs Q & A', - }, - }, { # multiple HTML5 videos on one page 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html', diff --git a/yt_dlp/extractor/gfycat.py b/yt_dlp/extractor/gfycat.py deleted file mode 100644 index edc2e56e4..000000000 --- a/yt_dlp/extractor/gfycat.py +++ /dev/null @@ -1,145 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - int_or_none, - float_or_none, - qualities, - ExtractorError, -) - - -class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)' - _EMBED_REGEX = [rf'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>{_VALID_URL})'] - _TESTS = [{ - 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', - 'info_dict': { - 'id': 'DeadlyDecisiveGermanpinscher', - 'ext': 'mp4', - 'title': 'Ghost in the Shell', - 'timestamp': 1410656006, - 'upload_date': '20140914', - 'uploader': 'anonymous', - 'duration': 10.4, - 'view_count': int, - 'like_count': int, - 'categories': list, - 'age_limit': 0, - 'uploader_id': 'anonymous', - 'description': '', - } - }, { - 'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa', - 'info_dict': { - 'id': 'JauntyTimelyAmazontreeboa', - 'ext': 'mp4', - 'title': 'JauntyTimelyAmazontreeboa', - 'timestamp': 1411720126, - 'upload_date': '20140926', - 'uploader': 'anonymous', - 'duration': 3.52, - 'view_count': int, - 'like_count': int, - 'categories': list, - 'age_limit': 0, - 'uploader_id': 'anonymous', - 'description': '', - } - }, { - 'url': 'https://gfycat.com/alienatedsolidgreathornedowl', - 'info_dict': { - 'id': 'alienatedsolidgreathornedowl', - 'ext': 'mp4', - 'upload_date': '20211226', - 'uploader_id': 'reactions', - 'timestamp': 1640536930, - 'like_count': int, - 'description': '', - 'title': 'Ingrid Michaelson, Zooey Deschanel - Merry Christmas Happy New Year', - 'categories': list, - 'age_limit': 0, - 'duration': 2.9583333333333335, - 'uploader': 'Reaction GIFs', - 'view_count': int, - } - }, { - 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish', - 'only_matching': True - }, { - 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull', - 'only_matching': True - }, { - 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball', - 'only_matching': True - }, { - 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif', - 'only_matching': True - }, { - 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4', - 'only_matching': True - }, { - 'url': 'http://gfycat.com/IFR/JauntyTimelyAmazontreeboa', - 'only_matching': True - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - gfy = self._download_json( - 'https://api.gfycat.com/v1/gfycats/%s' % video_id, - video_id, 'Downloading video info') - if 'error' in gfy: - raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True) - gfy = gfy['gfyItem'] - - title = gfy.get('title') or gfy['gfyName'] - description = gfy.get('description') - timestamp = int_or_none(gfy.get('createDate')) - uploader = gfy.get('userName') or gfy.get('username') - view_count = int_or_none(gfy.get('views')) - like_count = int_or_none(gfy.get('likes')) - dislike_count = int_or_none(gfy.get('dislikes')) - age_limit = 18 if gfy.get('nsfw') == '1' else 0 - - width = int_or_none(gfy.get('width')) - height = int_or_none(gfy.get('height')) - fps = int_or_none(gfy.get('frameRate')) - num_frames = int_or_none(gfy.get('numFrames')) - - duration = float_or_none(num_frames, fps) if num_frames and fps else None - - categories = gfy.get('tags') or gfy.get('extraLemmas') or [] - - FORMATS = ('gif', 'webm', 'mp4') - quality = qualities(FORMATS) - - formats = [] - for format_id in FORMATS: - video_url = gfy.get('%sUrl' % format_id) - if not video_url: - continue - filesize = int_or_none(gfy.get('%sSize' % format_id)) - formats.append({ - 'url': video_url, - 'format_id': format_id, - 'width': width, - 'height': height, - 'fps': fps, - 'filesize': filesize, - 'quality': quality(format_id), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'uploader': gfy.get('userDisplayName') or uploader, - 'uploader_id': uploader, - 'duration': duration, - 'view_count': view_count, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'categories': categories, - 'age_limit': age_limit, - 'formats': formats, - } diff --git a/yt_dlp/extractor/groupon.py b/yt_dlp/extractor/groupon.py index 362d3ff83..c1cbda35f 100644 --- a/yt_dlp/extractor/groupon.py +++ b/yt_dlp/extractor/groupon.py @@ -31,7 +31,6 @@ class GrouponIE(InfoExtractor): } _PROVIDERS = { - 'ooyala': ('ooyala:%s', 'Ooyala'), 'youtube': ('%s', 'Youtube'), } diff --git a/yt_dlp/extractor/helsinki.py b/yt_dlp/extractor/helsinki.py deleted file mode 100644 index e518cae1a..000000000 --- a/yt_dlp/extractor/helsinki.py +++ /dev/null @@ -1,38 +0,0 @@ -from .common import InfoExtractor -from ..utils import js_to_json - - -class HelsinkiIE(InfoExtractor): - IE_DESC = 'helsinki.fi' - _VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P<id>\d+)' - _TEST = { - 'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258', - 'info_dict': { - 'id': '20258', - 'ext': 'mp4', - 'title': 'Tietotekniikkafoorumi-iltapäivä', - 'description': 'md5:f5c904224d43c133225130fe156a5ee0', - }, - 'params': { - 'skip_download': True, # RTMP - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - params = self._parse_json(self._html_search_regex( - r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);', - webpage, 'player code'), video_id, transform_source=js_to_json) - formats = [{ - 'url': s['file'], - 'ext': 'mp4', - } for s in params['sources']] - - return { - 'id': video_id, - 'title': self._og_search_title(webpage).replace('Video: ', ''), - 'description': self._og_search_description(webpage), - 'formats': formats, - } diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py deleted file mode 100644 index f0c689883..000000000 --- a/yt_dlp/extractor/hitbox.py +++ /dev/null @@ -1,209 +0,0 @@ -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - clean_html, - determine_ext, - float_or_none, - int_or_none, - parse_iso8601, -) - - -class HitboxIE(InfoExtractor): - IE_NAME = 'hitbox' - _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.hitbox.tv/video/203213', - 'info_dict': { - 'id': '203213', - 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy', - 'alt_title': 'hitboxlive - Aug 9th #6', - 'description': '', - 'ext': 'mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 215.1666, - 'resolution': 'HD 720p', - 'uploader': 'hitboxlive', - 'view_count': int, - 'timestamp': 1407576133, - 'upload_date': '20140809', - 'categories': ['Live Show'], - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://www.smashcast.tv/hitboxlive/videos/203213', - 'only_matching': True, - }] - - def _extract_metadata(self, url, video_id): - thumb_base = 'https://edge.sf.hitbox.tv' - metadata = self._download_json( - '%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON') - - date = 'media_live_since' - media_type = 'livestream' - if metadata.get('media_type') == 'video': - media_type = 'video' - date = 'media_date_added' - - video_meta = metadata.get(media_type, [])[0] - title = video_meta.get('media_status') - alt_title = video_meta.get('media_title') - description = clean_html( - video_meta.get('media_description') - or video_meta.get('media_description_md')) - duration = float_or_none(video_meta.get('media_duration')) - uploader = video_meta.get('media_user_name') - views = int_or_none(video_meta.get('media_views')) - timestamp = parse_iso8601(video_meta.get(date), ' ') - categories = [video_meta.get('category_name')] - thumbs = [{ - 'url': thumb_base + video_meta.get('media_thumbnail'), - 'width': 320, - 'height': 180 - }, { - 'url': thumb_base + video_meta.get('media_thumbnail_large'), - 'width': 768, - 'height': 432 - }] - - return { - 'id': video_id, - 'title': title, - 'alt_title': alt_title, - 'description': description, - 'ext': 'mp4', - 'thumbnails': thumbs, - 'duration': duration, - 'uploader': uploader, - 'view_count': views, - 'timestamp': timestamp, - 'categories': categories, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - player_config = self._download_json( - 'https://www.smashcast.tv/api/player/config/video/%s' % video_id, - video_id, 'Downloading video JSON') - - formats = [] - for video in player_config['clip']['bitrates']: - label = video.get('label') - if label == 'Auto': - continue - video_url = video.get('url') - if not video_url: - continue - bitrate = int_or_none(video.get('bitrate')) - if determine_ext(video_url) == 'm3u8': - if not video_url.startswith('http'): - continue - formats.append({ - 'url': video_url, - 'ext': 'mp4', - 'tbr': bitrate, - 'format_note': label, - 'protocol': 'm3u8_native', - }) - else: - formats.append({ - 'url': video_url, - 'tbr': bitrate, - 'format_note': label, - }) - - metadata = self._extract_metadata( - 'https://www.smashcast.tv/api/media/video', video_id) - metadata['formats'] = formats - - return metadata - - -class HitboxLiveIE(HitboxIE): # XXX: Do not subclass from concrete IE - IE_NAME = 'hitbox:live' - _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'http://www.hitbox.tv/dimak', - 'info_dict': { - 'id': 'dimak', - 'ext': 'mp4', - 'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e', - 'timestamp': int, - 'upload_date': compat_str, - 'title': compat_str, - 'uploader': 'Dimak', - }, - 'params': { - # live - 'skip_download': True, - }, - }, { - 'url': 'https://www.smashcast.tv/dimak', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url) - - def _real_extract(self, url): - video_id = self._match_id(url) - - player_config = self._download_json( - 'https://www.smashcast.tv/api/player/config/live/%s' % video_id, - video_id) - - formats = [] - cdns = player_config.get('cdns') - servers = [] - for cdn in cdns: - # Subscribe URLs are not playable - if cdn.get('rtmpSubscribe') is True: - continue - base_url = cdn.get('netConnectionUrl') - host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1) - if base_url not in servers: - servers.append(base_url) - for stream in cdn.get('bitrates'): - label = stream.get('label') - if label == 'Auto': - continue - stream_url = stream.get('url') - if not stream_url: - continue - bitrate = int_or_none(stream.get('bitrate')) - if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8': - if not stream_url.startswith('http'): - continue - formats.append({ - 'url': stream_url, - 'ext': 'mp4', - 'tbr': bitrate, - 'format_note': label, - 'rtmp_live': True, - }) - else: - formats.append({ - 'url': '%s/%s' % (base_url, stream_url), - 'ext': 'mp4', - 'tbr': bitrate, - 'rtmp_live': True, - 'format_note': host, - 'page_url': url, - 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf', - }) - - metadata = self._extract_metadata( - 'https://www.smashcast.tv/api/media/live', video_id) - metadata['formats'] = formats - metadata['is_live'] = True - metadata['title'] = metadata.get('title') - - return metadata diff --git a/yt_dlp/extractor/howcast.py b/yt_dlp/extractor/howcast.py deleted file mode 100644 index 59cf80f1a..000000000 --- a/yt_dlp/extractor/howcast.py +++ /dev/null @@ -1,41 +0,0 @@ -from .common import InfoExtractor -from ..utils import parse_iso8601 - - -class HowcastIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)' - _TEST = { - 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', - 'md5': '7d45932269a288149483144f01b99789', - 'info_dict': { - 'id': '390161', - 'ext': 'mp4', - 'title': 'How to Tie a Square Knot Properly', - 'description': 'md5:dbe792e5f6f1489027027bf2eba188a3', - 'timestamp': 1276081287, - 'upload_date': '20100609', - 'duration': 56.823, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - embed_code = self._search_regex( - r'<iframe[^>]+src="[^"]+\bembed_code=([^\b]+)\b', - webpage, 'ooyala embed code') - - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % embed_code, - 'id': video_id, - 'timestamp': parse_iso8601(self._html_search_meta( - 'article:published_time', webpage, 'timestamp')), - } diff --git a/yt_dlp/extractor/howstuffworks.py b/yt_dlp/extractor/howstuffworks.py deleted file mode 100644 index 238fc0b42..000000000 --- a/yt_dlp/extractor/howstuffworks.py +++ /dev/null @@ -1,86 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - find_xpath_attr, - int_or_none, - js_to_json, - unescapeHTML, - determine_ext, -) - - -class HowStuffWorksIE(InfoExtractor): - _VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm' - _TESTS = [ - { - 'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm', - 'md5': '76646a5acc0c92bf7cd66751ca5db94d', - 'info_dict': { - 'id': '855410', - 'ext': 'mp4', - 'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web', - 'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb', - }, - }, - { - 'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - clip_js = self._search_regex( - r'(?s)var clip = ({.*?});', webpage, 'clip info') - clip_info = self._parse_json( - clip_js, display_id, transform_source=js_to_json) - - video_id = clip_info['content_id'] - formats = [] - m3u8_url = clip_info.get('m3u8') - if m3u8_url and determine_ext(m3u8_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True)) - flv_url = clip_info.get('flv_url') - if flv_url: - formats.append({ - 'url': flv_url, - 'format_id': 'flv', - }) - for video in clip_info.get('mp4', []): - formats.append({ - 'url': video['src'], - 'format_id': 'mp4-%s' % video['bitrate'], - 'vbr': int_or_none(video['bitrate'].rstrip('k')), - }) - - if not formats: - smil = self._download_xml( - 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id, - video_id, 'Downloading video SMIL') - - http_base = find_xpath_attr( - smil, - './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), - 'name', - 'httpBase').get('content') - - URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A' - - for video in smil.findall( - './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): - vbr = int_or_none(video.attrib['system-bitrate'], scale=1000) - formats.append({ - 'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX), - 'format_id': '%dk' % vbr, - 'vbr': vbr, - }) - - return { - 'id': '%s' % video_id, - 'display_id': display_id, - 'title': unescapeHTML(clip_info['clip_title']), - 'description': unescapeHTML(clip_info.get('caption')), - 'thumbnail': clip_info.get('video_still_url'), - 'duration': int_or_none(clip_info.get('duration')), - 'formats': formats, - } diff --git a/yt_dlp/extractor/keezmovies.py b/yt_dlp/extractor/keezmovies.py deleted file mode 100644 index b50da420c..000000000 --- a/yt_dlp/extractor/keezmovies.py +++ /dev/null @@ -1,125 +0,0 @@ -import re - -from .common import InfoExtractor -from ..aes import aes_decrypt_text -from ..compat import compat_urllib_parse_unquote -from ..utils import ( - determine_ext, - format_field, - int_or_none, - str_to_int, - strip_or_none, - url_or_none, -) - - -class KeezMoviesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681', - 'md5': '2ac69cdb882055f71d82db4311732a1a', - 'info_dict': { - 'id': '18070681', - 'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money', - 'ext': 'mp4', - 'title': 'Arab wife want it so bad I see she thirsty and has tiny money.', - 'thumbnail': None, - 'view_count': int, - 'age_limit': 18, - } - }, { - 'url': 'http://www.keezmovies.com/video/18070681', - 'only_matching': True, - }] - - def _extract_info(self, url, fatal=True): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - display_id = (mobj.group('display_id') - if 'display_id' in mobj.groupdict() - else None) or mobj.group('id') - - webpage = self._download_webpage( - url, display_id, headers={'Cookie': 'age_verified=1'}) - - formats = [] - format_urls = set() - - title = None - thumbnail = None - duration = None - encrypted = False - - def extract_format(format_url, height=None): - format_url = url_or_none(format_url) - if not format_url or not format_url.startswith(('http', '//')): - return - if format_url in format_urls: - return - format_urls.add(format_url) - tbr = int_or_none(self._search_regex( - r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None)) - if not height: - height = int_or_none(self._search_regex( - r'[/_](\d+)[pP][/_]', format_url, 'height', default=None)) - if encrypted: - format_url = aes_decrypt_text( - video_url, title, 32).decode('utf-8') - formats.append({ - 'url': format_url, - 'format_id': format_field(height, None, '%dp'), - 'height': height, - 'tbr': tbr, - }) - - flashvars = self._parse_json( - self._search_regex( - r'flashvars\s*=\s*({.+?});', webpage, - 'flashvars', default='{}'), - display_id, fatal=False) - - if flashvars: - title = flashvars.get('video_title') - thumbnail = flashvars.get('image_url') - duration = int_or_none(flashvars.get('video_duration')) - encrypted = flashvars.get('encrypted') is True - for key, value in flashvars.items(): - mobj = re.search(r'quality_(\d+)[pP]', key) - if mobj: - extract_format(value, int(mobj.group(1))) - video_url = flashvars.get('video_url') - if video_url and determine_ext(video_url, None): - extract_format(video_url) - - video_url = self._html_search_regex( - r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1', - webpage, 'video url', default=None, group='url') - if video_url: - extract_format(compat_urllib_parse_unquote(video_url)) - - if not formats: - if 'title="This video is no longer available"' in webpage: - self.raise_no_formats( - 'Video %s is no longer available' % video_id, expected=True) - - if not title: - title = self._html_search_regex( - r'<h1[^>]*>([^<]+)', webpage, 'title') - - return webpage, { - 'id': video_id, - 'display_id': display_id, - 'title': strip_or_none(title), - 'thumbnail': thumbnail, - 'duration': duration, - 'age_limit': 18, - 'formats': formats, - } - - def _real_extract(self, url): - webpage, info = self._extract_info(url, fatal=False) - if not info['formats']: - return self.url_result(url, 'Generic') - info['view_count'] = str_to_int(self._search_regex( - r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False)) - return info diff --git a/yt_dlp/extractor/kinja.py b/yt_dlp/extractor/kinja.py index df1386fb8..a225d0a0d 100644 --- a/yt_dlp/extractor/kinja.py +++ b/yt_dlp/extractor/kinja.py @@ -41,7 +41,6 @@ class KinjaEmbedIE(InfoExtractor): kinjavideo| mcp| megaphone| - ooyala| soundcloud(?:-playlist)?| tumblr-post| twitch-stream| @@ -61,9 +60,6 @@ class KinjaEmbedIE(InfoExtractor): }, { 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', 'only_matching': True, - }, { - 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5', - 'only_matching': True, }, { 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', 'only_matching': True, @@ -103,7 +99,6 @@ class KinjaEmbedIE(InfoExtractor): 'jwplayer-video': _JWPLATFORM_PROVIDER, 'jwp-video': _JWPLATFORM_PROVIDER, 'megaphone': ('player.megaphone.fm/', 'Generic'), - 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'), 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), @@ -129,8 +124,6 @@ def _real_extract(self, url): video_id, playlist_id = video_id.split('/') result_url = provider[0] % (video_id, playlist_id) else: - if video_type == 'ooyala': - video_id = video_id.split('/')[0] result_url = provider[0] + video_id return self.url_result('http://' + result_url, provider[1]) diff --git a/yt_dlp/extractor/laola1tv.py b/yt_dlp/extractor/laola1tv.py deleted file mode 100644 index 416dd7eb4..000000000 --- a/yt_dlp/extractor/laola1tv.py +++ /dev/null @@ -1,261 +0,0 @@ -import json -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - unified_strdate, - urlencode_postdata, - xpath_element, - xpath_text, - update_url_query, - js_to_json, -) - - -class Laola1TvEmbedIE(InfoExtractor): - IE_NAME = 'laola1tv:embed' - _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)' - _TESTS = [{ - # flashvars.premium = "false"; - 'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024', - 'info_dict': { - 'id': '708065', - 'ext': 'mp4', - 'title': 'MA Long CHN - FAN Zhendong CHN', - 'uploader': 'ITTF - International Table Tennis Federation', - 'upload_date': '20161211', - }, - }] - - def _extract_token_url(self, stream_access_url, video_id, data): - return self._download_json( - self._proto_relative_url(stream_access_url, 'https:'), video_id, - headers={ - 'Content-Type': 'application/json', - }, data=json.dumps(data).encode())['data']['stream-access'][0] - - def _extract_formats(self, token_url, video_id): - token_doc = self._download_xml( - token_url, video_id, 'Downloading token', - headers=self.geo_verification_headers()) - - token_attrib = xpath_element(token_doc, './/token').attrib - - if token_attrib['status'] != '0': - raise ExtractorError( - 'Token error: %s' % token_attrib['comment'], expected=True) - - formats = self._extract_akamai_formats( - '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']), - video_id) - return formats - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - flash_vars = self._search_regex( - r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars') - - def get_flashvar(x, *args, **kwargs): - flash_var = self._search_regex( - r'%s\s*:\s*"([^"]+)"' % x, - flash_vars, x, default=None) - if not flash_var: - flash_var = self._search_regex([ - r'flashvars\.%s\s*=\s*"([^"]+)"' % x, - r'%s\s*=\s*"([^"]+)"' % x], - webpage, x, *args, **kwargs) - return flash_var - - hd_doc = self._download_xml( - 'http://www.laola1.tv/server/hd_video.php', video_id, query={ - 'play': get_flashvar('streamid'), - 'partner': get_flashvar('partnerid'), - 'portal': get_flashvar('portalid'), - 'lang': get_flashvar('sprache'), - 'v5ident': '', - }) - - _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) - title = _v('title', fatal=True) - - token_url = None - premium = get_flashvar('premium', default=None) - if premium: - token_url = update_url_query( - _v('url', fatal=True), { - 'timestamp': get_flashvar('timestamp'), - 'auth': get_flashvar('auth'), - }) - else: - data_abo = urlencode_postdata( - dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))) - stream_access_url = update_url_query( - 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', { - 'videoId': _v('id'), - 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'), - 'label': _v('label'), - 'area': _v('area'), - }) - token_url = self._extract_token_url(stream_access_url, video_id, data_abo) - - formats = self._extract_formats(token_url, video_id) - - categories_str = _v('meta_sports') - categories = categories_str.split(',') if categories_str else [] - is_live = _v('islive') == 'true' - - return { - 'id': video_id, - 'title': title, - 'upload_date': unified_strdate(_v('time_date')), - 'uploader': _v('meta_organisation'), - 'categories': categories, - 'is_live': is_live, - 'formats': formats, - } - - -class Laola1TvBaseIE(Laola1TvEmbedIE): # XXX: Do not subclass from concrete IE - def _extract_video(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - if 'Dieser Livestream ist bereits beendet.' in webpage: - raise ExtractorError('This live stream has already finished.', expected=True) - - conf = self._parse_json(self._search_regex( - r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), - display_id, - transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s))) - video_id = conf['videoid'] - - config = self._download_json(conf['configUrl'], video_id, query={ - 'videoid': video_id, - 'partnerid': conf['partnerid'], - 'language': conf.get('language', ''), - 'portal': conf.get('portalid', ''), - }) - error = config.get('error') - if error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) - - video_data = config['video'] - title = video_data['title'] - is_live = video_data.get('isLivestream') and video_data.get('isLive') - meta = video_data.get('metaInformation') - sports = meta.get('sports') - categories = sports.split(',') if sports else [] - - token_url = self._extract_token_url( - video_data['streamAccess'], video_id, - video_data['abo']['required']) - - formats = self._extract_formats(token_url, video_id) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('image'), - 'categories': categories, - 'formats': formats, - 'is_live': is_live, - } - - -class Laola1TvIE(Laola1TvBaseIE): - IE_NAME = 'laola1tv' - _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', - 'info_dict': { - 'id': '227883', - 'display_id': 'straubing-tigers-koelner-haie', - 'ext': 'flv', - 'title': 'Straubing Tigers - Kölner Haie', - 'upload_date': '20140912', - 'is_live': False, - 'categories': ['Eishockey'], - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie', - 'info_dict': { - 'id': '464602', - 'display_id': 'straubing-tigers-koelner-haie', - 'ext': 'flv', - 'title': 'Straubing Tigers - Kölner Haie', - 'upload_date': '20160129', - 'is_live': False, - 'categories': ['Eishockey'], - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde', - 'info_dict': { - 'id': '487850', - 'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde', - 'ext': 'flv', - 'title': 'Belogorie BELGOROD - TRENTINO Diatec', - 'upload_date': '20160322', - 'uploader': 'CEV - Europäischer Volleyball Verband', - 'is_live': True, - 'categories': ['Volleyball'], - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'This live stream has already finished.', - }] - - def _real_extract(self, url): - return self._extract_video(url) - - -class EHFTVIE(Laola1TvBaseIE): - IE_NAME = 'ehftv' - _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761', - 'info_dict': { - 'id': '1166761', - 'display_id': 'paris-saint-germain-handball-pge-vive-kielce', - 'ext': 'mp4', - 'title': 'Paris Saint-Germain Handball - PGE Vive Kielce', - 'is_live': False, - 'categories': ['Handball'], - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - return self._extract_video(url) - - -class ITTFIE(InfoExtractor): - _VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P<id>\d+)' - _TEST = { - 'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802', - 'only_matching': True, - } - - def _real_extract(self, url): - return self.url_result( - update_url_query('https://www.laola1.tv/titanplayer.php', { - 'videoid': self._match_id(url), - 'type': 'V', - 'lang': 'en', - 'portal': 'int', - 'customer': 1024, - }), Laola1TvEmbedIE.ie_key()) diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py deleted file mode 100644 index 0b1644293..000000000 --- a/yt_dlp/extractor/linuxacademy.py +++ /dev/null @@ -1,238 +0,0 @@ -import json -import random - -from .common import InfoExtractor -from ..compat import compat_b64decode, compat_str -from ..networking.exceptions import HTTPError -from ..utils import ( - clean_html, - ExtractorError, - js_to_json, - parse_duration, - try_get, - unified_timestamp, - urlencode_postdata, - urljoin, -) - - -class LinuxAcademyIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?linuxacademy\.com/cp/ - (?: - courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)| - modules/view/id/(?P<course_id>\d+) - ) - ''' - _TESTS = [{ - 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675', - 'info_dict': { - 'id': '7971-2', - 'ext': 'mp4', - 'title': 'What Is Data Science', - 'description': 'md5:c574a3c20607144fb36cb65bdde76c99', - 'timestamp': int, # The timestamp and upload date changes - 'upload_date': r're:\d+', - 'duration': 304, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Requires Linux Academy account credentials', - }, { - 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', - 'only_matching': True, - }, { - 'url': 'https://linuxacademy.com/cp/modules/view/id/154', - 'info_dict': { - 'id': '154', - 'title': 'AWS Certified Cloud Practitioner', - 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c', - 'duration': 28835, - }, - 'playlist_count': 41, - 'skip': 'Requires Linux Academy account credentials', - }, { - 'url': 'https://linuxacademy.com/cp/modules/view/id/39', - 'info_dict': { - 'id': '39', - 'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)', - 'description': 'md5:0f1d3369e90c3fb14a79813b863c902f', - 'duration': 89280, - }, - 'playlist_count': 73, - 'skip': 'Requires Linux Academy account credentials', - }] - - _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' - _ORIGIN_URL = 'https://linuxacademy.com' - _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' - _NETRC_MACHINE = 'linuxacademy' - - def _perform_login(self, username, password): - def random_string(): - return ''.join(random.choices( - '0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~', k=32)) - - webpage, urlh = self._download_webpage_handle( - self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ - 'client_id': self._CLIENT_ID, - 'response_type': 'token id_token', - 'response_mode': 'web_message', - 'redirect_uri': self._ORIGIN_URL, - 'scope': 'openid email user_impersonation profile', - 'audience': self._ORIGIN_URL, - 'state': random_string(), - 'nonce': random_string(), - }) - - login_data = self._parse_json( - self._search_regex( - r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, - 'login info', group='value'), None, - transform_source=lambda x: compat_b64decode(x).decode('utf-8') - )['extraParams'] - - login_data.update({ - 'client_id': self._CLIENT_ID, - 'redirect_uri': self._ORIGIN_URL, - 'tenant': 'lacausers', - 'connection': 'Username-Password-ACG-Proxy', - 'username': username, - 'password': password, - 'sso': 'true', - }) - - login_state_url = urlh.url - - try: - login_page = self._download_webpage( - 'https://login.linuxacademy.com/usernamepassword/login', None, - 'Downloading login page', data=json.dumps(login_data).encode(), - headers={ - 'Content-Type': 'application/json', - 'Origin': 'https://login.linuxacademy.com', - 'Referer': login_state_url, - }) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 401: - error = self._parse_json(e.cause.response.read(), None) - message = error.get('description') or error['code'] - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, message), expected=True) - raise - - callback_page, urlh = self._download_webpage_handle( - 'https://login.linuxacademy.com/login/callback', None, - 'Downloading callback page', - data=urlencode_postdata(self._hidden_inputs(login_page)), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Origin': 'https://login.linuxacademy.com', - 'Referer': login_state_url, - }) - - access_token = self._search_regex( - r'access_token=([^=&]+)', urlh.url, - 'access token', default=None) - if not access_token: - access_token = self._parse_json( - self._search_regex( - r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page, - 'authorization response'), None, - transform_source=js_to_json)['response']['access_token'] - - self._download_webpage( - 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' - % access_token, None, 'Downloading token validation page') - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') - item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) - - webpage = self._download_webpage(url, item_id) - - # course path - if course_id: - module = self._parse_json( - self._search_regex( - r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'), - item_id) - entries = [] - chapter_number = None - chapter = None - chapter_id = None - for item in module['items']: - if not isinstance(item, dict): - continue - - def type_field(key): - return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower() - type_fields = (type_field('name'), type_field('slug')) - # Move to next module section - if 'section' in type_fields: - chapter = item.get('course_name') - chapter_id = item.get('course_module') - chapter_number = 1 if not chapter_number else chapter_number + 1 - continue - # Skip non-lessons - if 'lesson' not in type_fields: - continue - lesson_url = urljoin(url, item.get('url')) - if not lesson_url: - continue - title = item.get('title') or item.get('lesson_name') - description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text')) - entries.append({ - '_type': 'url_transparent', - 'url': lesson_url, - 'ie_key': LinuxAcademyIE.ie_key(), - 'title': title, - 'description': description, - 'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')), - 'duration': parse_duration(item.get('duration')), - 'chapter': chapter, - 'chapter_id': chapter_id, - 'chapter_number': chapter_number, - }) - return { - '_type': 'playlist', - 'entries': entries, - 'id': course_id, - 'title': module.get('title'), - 'description': module.get('md_desc') or clean_html(module.get('desc')), - 'duration': parse_duration(module.get('duration')), - } - - # single video path - m3u8_url = self._parse_json( - self._search_regex( - r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'), - item_id)[0]['file'] - formats = self._extract_m3u8_formats( - m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - info = { - 'id': item_id, - 'formats': formats, - } - lesson = self._parse_json( - self._search_regex( - (r'window\.lesson\s*=\s*({.+?})\s*;', - r'player\.lesson\s*=\s*({.+?})\s*;'), - webpage, 'lesson', default='{}'), item_id, fatal=False) - if lesson: - info.update({ - 'title': lesson.get('lesson_name'), - 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')), - 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')), - 'duration': parse_duration(lesson.get('duration')), - }) - if not info.get('title'): - info['title'] = self._search_regex( - (r'>Lecture\s*:\s*(?P<value>[^<]+)', - r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, - 'title', group='value') - return info diff --git a/yt_dlp/extractor/m6.py b/yt_dlp/extractor/m6.py deleted file mode 100644 index 9dcc60164..000000000 --- a/yt_dlp/extractor/m6.py +++ /dev/null @@ -1,22 +0,0 @@ -from .common import InfoExtractor - - -class M6IE(InfoExtractor): - IE_NAME = 'm6' - _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html' - - _TEST = { - 'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html', - 'md5': '242994a87de2c316891428e0176bcb77', - 'info_dict': { - 'id': '11323908', - 'ext': 'mp4', - 'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »', - 'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2', - 'duration': 100, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result('6play:%s' % video_id, 'SixPlay', video_id) diff --git a/yt_dlp/extractor/meta.py b/yt_dlp/extractor/meta.py deleted file mode 100644 index 7c11e6017..000000000 --- a/yt_dlp/extractor/meta.py +++ /dev/null @@ -1,70 +0,0 @@ -from .common import InfoExtractor -from .pladform import PladformIE -from ..utils import ( - unescapeHTML, - int_or_none, - ExtractorError, -) - - -class METAIE(InfoExtractor): - _VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://video.meta.ua/5502115.video', - 'md5': '71b6f3ee274bef16f1ab410f7f56b476', - 'info_dict': { - 'id': '5502115', - 'ext': 'mp4', - 'title': 'Sony Xperia Z camera test [HQ]', - 'description': 'Xperia Z shoots video in FullHD HDR.', - 'uploader_id': 'nomobile', - 'uploader': 'CHЁZA.TV', - 'upload_date': '20130211', - }, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://video.meta.ua/iframe/5502115', - 'only_matching': True, - }, { - # pladform embed - 'url': 'http://video.meta.ua/7121015.video', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - st_html5 = self._search_regex( - r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None) - - if st_html5: - # uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js - json_str = '' - for i in range(0, len(st_html5), 3): - json_str += '�%s;' % st_html5[i:i + 3] - uppod_data = self._parse_json(unescapeHTML(json_str), video_id) - error = uppod_data.get('customnotfound') - if error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) - - video_url = uppod_data['file'] - info = { - 'id': video_id, - 'url': video_url, - 'title': uppod_data.get('comment') or self._og_search_title(webpage), - 'description': self._og_search_description(webpage, default=None), - 'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage), - 'duration': int_or_none(self._og_search_property( - 'video:duration', webpage, default=None)), - } - if 'youtube.com/' in video_url: - info.update({ - '_type': 'url_transparent', - 'ie_key': 'Youtube', - }) - return info - - pladform_url = PladformIE._extract_url(webpage) - if pladform_url: - return self.url_result(pladform_url) diff --git a/yt_dlp/extractor/metacafe.py b/yt_dlp/extractor/metacafe.py deleted file mode 100644 index d7f5def0e..000000000 --- a/yt_dlp/extractor/metacafe.py +++ /dev/null @@ -1,281 +0,0 @@ -import json -import re -import urllib.parse - -from .common import InfoExtractor -from ..compat import compat_parse_qs, compat_urllib_parse_unquote -from ..utils import ( - ExtractorError, - determine_ext, - get_element_by_attribute, - int_or_none, - mimetype2ext, -) - - -class MetacafeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P<id>[^/]+)/(?P<display_id>[^/?#]+)' - _DISCLAIMER = 'http://www.metacafe.com/family_filter/' - _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' - IE_NAME = 'metacafe' - _TESTS = [ - # Youtube video - { - 'add_ie': ['Youtube'], - 'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/', - 'info_dict': { - 'id': '_aUehQsCQtM', - 'ext': 'mp4', - 'upload_date': '20090102', - 'title': 'The Electric Company | "Short I" | PBS KIDS GO!', - 'description': 'md5:2439a8ef6d5a70e380c22f5ad323e5a8', - 'uploader': 'PBS', - 'uploader_id': 'PBS' - } - }, - # Normal metacafe video - { - 'url': 'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/', - 'md5': '6e0bca200eaad2552e6915ed6fd4d9ad', - 'info_dict': { - 'id': '11121940', - 'ext': 'mp4', - 'title': 'News: Stuff You Won\'t Do with Your PlayStation 4', - 'uploader': 'ign', - 'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.', - }, - 'skip': 'Page is temporarily unavailable.', - }, - # metacafe video with family filter - { - 'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/', - 'md5': 'b06082c5079bbdcde677a6291fbdf376', - 'info_dict': { - 'id': '2155630', - 'ext': 'mp4', - 'title': 'Adult Art By David Hart 156', - 'uploader': '63346', - 'description': 'md5:9afac8fc885252201ad14563694040fc', - }, - 'params': { - 'skip_download': True, - }, - }, - # AnyClip video - { - 'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/', - 'info_dict': { - 'id': 'an-dVVXnuY7Jh77J', - 'ext': 'mp4', - 'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3', - 'uploader': 'AnyClip', - 'description': 'md5:cbef0460d31e3807f6feb4e7a5952e5b', - }, - }, - # age-restricted video - { - 'url': 'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/', - 'md5': '98dde7c1a35d02178e8ab7560fe8bd09', - 'info_dict': { - 'id': '5186653', - 'ext': 'mp4', - 'title': 'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.', - 'uploader': 'Dwayne Pipe', - 'description': 'md5:950bf4c581e2c059911fa3ffbe377e4b', - 'age_limit': 18, - }, - }, - # cbs video - { - 'url': 'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/', - 'info_dict': { - 'id': '8VD4r_Zws8VP', - 'ext': 'flv', - 'title': 'Open: This is Face the Nation, February 9', - 'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476', - 'duration': 96, - 'uploader': 'CBSI-NEW', - 'upload_date': '20140209', - 'timestamp': 1391959800, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, - # Movieclips.com video - { - 'url': 'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/', - 'info_dict': { - 'id': 'mv-Wy7ZU', - 'ext': 'mp4', - 'title': 'My Week with Marilyn - Do You Love Me?', - 'description': 'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.', - 'uploader': 'movie_trailers', - 'duration': 176, - }, - 'params': { - 'skip_download': 'requires rtmpdump', - } - } - ] - - def report_disclaimer(self): - self.to_screen('Retrieving disclaimer') - - def _real_extract(self, url): - # Extract id and simplified title from URL - video_id, display_id = self._match_valid_url(url).groups() - - # the video may come from an external site - m_external = re.match(r'^(\w{2})-(.*)$', video_id) - if m_external is not None: - prefix, ext_id = m_external.groups() - # Check if video comes from YouTube - if prefix == 'yt': - return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube') - # CBS videos use theplatform.com - if prefix == 'cb': - return self.url_result('theplatform:%s' % ext_id, 'ThePlatform') - - headers = { - # Disable family filter - 'Cookie': 'user=%s; ' % urllib.parse.quote(json.dumps({'ffilter': False})) - } - - # AnyClip videos require the flashversion cookie so that we get the link - # to the mp4 file - if video_id.startswith('an-'): - headers['Cookie'] += 'flashVersion=0; ' - - # Retrieve video webpage to extract further information - webpage = self._download_webpage(url, video_id, headers=headers) - - error = get_element_by_attribute( - 'class', 'notfound-page-title', webpage) - if error: - raise ExtractorError(error, expected=True) - - video_title = self._html_search_meta( - ['og:title', 'twitter:title'], webpage, 'title', default=None) or self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title') - - # Extract URL, uploader and title from webpage - self.report_extraction(video_id) - video_url = None - mobj = re.search(r'(?m)&(?:media|video)URL=([^&]+)', webpage) - if mobj is not None: - mediaURL = compat_urllib_parse_unquote(mobj.group(1)) - video_ext = determine_ext(mediaURL) - - # Extract gdaKey if available - mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - if mobj is None: - video_url = mediaURL - else: - gdaKey = mobj.group(1) - video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - if video_url is None: - mobj = re.search(r'<video src="([^"]+)"', webpage) - if mobj: - video_url = mobj.group(1) - video_ext = 'mp4' - if video_url is None: - flashvars = self._search_regex( - r' name="flashvars" value="(.*?)"', webpage, 'flashvars', - default=None) - if flashvars: - vardict = compat_parse_qs(flashvars) - if 'mediaData' not in vardict: - raise ExtractorError('Unable to extract media URL') - mobj = re.search( - r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0]) - if mobj is None: - raise ExtractorError('Unable to extract media URL') - mediaURL = mobj.group('mediaURL').replace('\\/', '/') - video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key')) - video_ext = determine_ext(video_url) - if video_url is None: - player_url = self._search_regex( - r"swfobject\.embedSWF\('([^']+)'", - webpage, 'config URL', default=None) - if player_url: - config_url = self._search_regex( - r'config=(.+)$', player_url, 'config URL') - config_doc = self._download_xml( - config_url, video_id, - note='Downloading video config') - smil_url = config_doc.find('.//properties').attrib['smil_file'] - smil_doc = self._download_xml( - smil_url, video_id, - note='Downloading SMIL document') - base_url = smil_doc.find('./head/meta').attrib['base'] - video_url = [] - for vn in smil_doc.findall('.//video'): - br = int(vn.attrib['system-bitrate']) - play_path = vn.attrib['src'] - video_url.append({ - 'format_id': 'smil-%d' % br, - 'url': base_url, - 'play_path': play_path, - 'page_url': url, - 'player_url': player_url, - 'ext': play_path.partition(':')[0], - }) - if video_url is None: - flashvars = self._parse_json(self._search_regex( - r'flashvars\s*=\s*({.*});', webpage, 'flashvars', - default=None), video_id, fatal=False) - if flashvars: - video_url = [] - for source in flashvars.get('sources'): - source_url = source.get('src') - if not source_url: - continue - ext = mimetype2ext(source.get('type')) or determine_ext(source_url) - if ext == 'm3u8': - video_url.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - video_url.append({ - 'url': source_url, - 'ext': ext, - }) - - if video_url is None: - raise ExtractorError('Unsupported video type') - - description = self._html_search_meta( - ['og:description', 'twitter:description', 'description'], - webpage, 'title', fatal=False) - thumbnail = self._html_search_meta( - ['og:image', 'twitter:image'], webpage, 'title', fatal=False) - video_uploader = self._html_search_regex( - r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', - webpage, 'uploader nickname', fatal=False) - duration = int_or_none( - self._html_search_meta('video:duration', webpage, default=None)) - age_limit = ( - 18 - if re.search(r'(?:"contentRating":|"rating",)"restricted"', webpage) - else 0) - - if isinstance(video_url, list): - formats = video_url - else: - formats = [{ - 'url': video_url, - 'ext': video_ext, - }] - - return { - 'id': video_id, - 'display_id': display_id, - 'description': description, - 'uploader': video_uploader, - 'title': video_title, - 'thumbnail': thumbnail, - 'age_limit': age_limit, - 'formats': formats, - 'duration': duration, - } diff --git a/yt_dlp/extractor/mgoon.py b/yt_dlp/extractor/mgoon.py deleted file mode 100644 index 2388a7192..000000000 --- a/yt_dlp/extractor/mgoon.py +++ /dev/null @@ -1,81 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - qualities, - unified_strdate, -) - - -class MgoonIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)| - video\.mgoon\.com)/(?P<id>[0-9]+)''' - _API_URL = 'http://mpos.mgoon.com/player/video?id={0:}' - _TESTS = [ - { - 'url': 'http://m.mgoon.com/ch/hi6618/v/5582148', - 'md5': 'dd46bb66ab35cf6d51cc812fd82da79d', - 'info_dict': { - 'id': '5582148', - 'uploader_id': 'hi6618', - 'duration': 240.419, - 'upload_date': '20131220', - 'ext': 'mp4', - 'title': 'md5:543aa4c27a4931d371c3f433e8cebebc', - 'thumbnail': r're:^https?://.*\.jpg$', - } - }, - { - 'url': 'http://www.mgoon.com/play/view/5582148', - 'only_matching': True, - }, - { - 'url': 'http://video.mgoon.com/5582148', - 'only_matching': True, - }, - ] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - data = self._download_json(self._API_URL.format(video_id), video_id) - - if data.get('errorInfo', {}).get('code') != 'NONE': - raise ExtractorError('%s encountered an error: %s' % ( - self.IE_NAME, data['errorInfo']['message']), expected=True) - - v_info = data['videoInfo'] - title = v_info.get('v_title') - thumbnail = v_info.get('v_thumbnail') - duration = v_info.get('v_duration') - upload_date = unified_strdate(v_info.get('v_reg_date')) - uploader_id = data.get('userInfo', {}).get('u_alias') - if duration: - duration /= 1000.0 - - age_limit = None - if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT': - age_limit = 18 - - formats = [] - get_quality = qualities(['360p', '480p', '720p', '1080p']) - for fmt in data['videoFiles']: - formats.append({ - 'format_id': fmt['label'], - 'quality': get_quality(fmt['label']), - 'url': fmt['url'], - 'ext': fmt['format'], - - }) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - 'duration': duration, - 'upload_date': upload_date, - 'uploader_id': uploader_id, - 'age_limit': age_limit, - } diff --git a/yt_dlp/extractor/miomio.py b/yt_dlp/extractor/miomio.py deleted file mode 100644 index 8df8cba19..000000000 --- a/yt_dlp/extractor/miomio.py +++ /dev/null @@ -1,134 +0,0 @@ -import random - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..networking import Request -from ..utils import ExtractorError, int_or_none, xpath_text - - -class MioMioIE(InfoExtractor): - IE_NAME = 'miomio.tv' - _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)' - _TESTS = [{ - # "type=video" in flashvars - 'url': 'http://www.miomio.tv/watch/cc88912/', - 'info_dict': { - 'id': '88912', - 'ext': 'flv', - 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕', - 'duration': 5923, - }, - 'skip': 'Unable to load videos', - }, { - 'url': 'http://www.miomio.tv/watch/cc184024/', - 'info_dict': { - 'id': '43729', - 'title': '《动漫同人插画绘制》', - }, - 'playlist_mincount': 86, - 'skip': 'Unable to load videos', - }, { - 'url': 'http://www.miomio.tv/watch/cc173113/', - 'info_dict': { - 'id': '173113', - 'title': 'The New Macbook 2015 上手试玩与简评' - }, - 'playlist_mincount': 2, - 'skip': 'Unable to load videos', - }, { - # new 'h5' player - 'url': 'http://www.miomio.tv/watch/cc273997/', - 'md5': '0b27a4b4495055d826813f8c3a6b2070', - 'info_dict': { - 'id': '273997', - 'ext': 'mp4', - 'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31', - }, - 'skip': 'Unable to load videos', - }] - - def _extract_mioplayer(self, webpage, video_id, title, http_headers): - xml_config = self._search_regex( - r'flashvars="type=(?:sina|video)&(.+?)&', - webpage, 'xml config') - - # skipping the following page causes lags and eventually connection drop-outs - self._request_webpage( - 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), - video_id) - - vid_config_request = Request( - 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config), - headers=http_headers) - - # the following xml contains the actual configuration information on the video file(s) - vid_config = self._download_xml(vid_config_request, video_id) - - if not int_or_none(xpath_text(vid_config, 'timelength')): - raise ExtractorError('Unable to load videos!', expected=True) - - entries = [] - for f in vid_config.findall('./durl'): - segment_url = xpath_text(f, 'url', 'video url') - if not segment_url: - continue - order = xpath_text(f, 'order', 'order') - segment_id = video_id - segment_title = title - if order: - segment_id += '-%s' % order - segment_title += ' part %s' % order - entries.append({ - 'id': segment_id, - 'url': segment_url, - 'title': segment_title, - 'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000), - 'http_headers': http_headers, - }) - - return entries - - def _download_chinese_webpage(self, *args, **kwargs): - # Requests with English locales return garbage - headers = { - 'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3', - } - kwargs.setdefault('headers', {}).update(headers) - return self._download_webpage(*args, **kwargs) - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_chinese_webpage( - url, video_id) - - title = self._html_search_meta( - 'description', webpage, 'title', fatal=True) - - mioplayer_path = self._search_regex( - r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path') - - if '_h5' in mioplayer_path: - player_url = compat_urlparse.urljoin(url, mioplayer_path) - player_webpage = self._download_chinese_webpage( - player_url, video_id, - note='Downloading player webpage', headers={'Referer': url}) - entries = self._parse_html5_media_entries(player_url, player_webpage, video_id) - http_headers = {'Referer': player_url} - else: - http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path} - entries = self._extract_mioplayer(webpage, video_id, title, http_headers) - - if len(entries) == 1: - segment = entries[0] - segment['id'] = video_id - segment['title'] = title - segment['http_headers'] = http_headers - return segment - - return { - '_type': 'multi_video', - 'id': video_id, - 'entries': entries, - 'title': title, - 'http_headers': http_headers, - } diff --git a/yt_dlp/extractor/mnet.py b/yt_dlp/extractor/mnet.py deleted file mode 100644 index 98bab2e10..000000000 --- a/yt_dlp/extractor/mnet.py +++ /dev/null @@ -1,85 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601, -) - - -class MnetIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mnet\.(?:com|interest\.me)/tv/vod/(?:.*?\bclip_id=)?(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://www.mnet.com/tv/vod/171008', - 'info_dict': { - 'id': '171008', - 'title': 'SS_이해인@히든박스', - 'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55', - 'duration': 88, - 'upload_date': '20151231', - 'timestamp': 1451564040, - 'age_limit': 0, - 'thumbnails': 'mincount:5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'ext': 'flv', - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - 'url': 'http://mnet.interest.me/tv/vod/172790', - 'only_matching': True, - }, { - 'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - # TODO: extract rtmp formats - # no stype -> rtmp url - # stype=H -> m3u8 url - # stype=M -> mpd url - info = self._download_json( - 'http://content.api.mnet.com/player/vodConfig', - video_id, 'Downloading vod config JSON', query={ - 'id': video_id, - 'ctype': 'CLIP', - 'stype': 'H', - })['data']['info'] - - title = info['title'] - - cdn_data = self._download_json( - info['cdn'], video_id, 'Downloading vod cdn JSON')['data'][0] - m3u8_url = cdn_data['url'] - token = cdn_data.get('token') - if token and token != '-': - m3u8_url += '?' + token - formats = self._extract_wowza_formats( - m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m']) - - description = info.get('ment') - duration = parse_duration(info.get('time')) - timestamp = parse_iso8601(info.get('date'), delimiter=' ') - age_limit = info.get('adult') - if age_limit is not None: - age_limit = 0 if age_limit == 'N' else 18 - thumbnails = [{ - 'id': thumb_format, - 'url': thumb['url'], - 'width': int_or_none(thumb.get('width')), - 'height': int_or_none(thumb.get('height')), - } for thumb_format, thumb in info.get('cover', {}).items() if thumb.get('url')] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': timestamp, - 'age_limit': age_limit, - 'thumbnails': thumbnails, - 'formats': formats, - } diff --git a/yt_dlp/extractor/moevideo.py b/yt_dlp/extractor/moevideo.py deleted file mode 100644 index fda08cae9..000000000 --- a/yt_dlp/extractor/moevideo.py +++ /dev/null @@ -1,74 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - clean_html, - int_or_none, -) - - -class MoeVideoIE(InfoExtractor): - IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net' - _VALID_URL = r'''(?x) - https?://(?P<host>(?:www\.)? - (?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/ - (?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)''' - _API_URL = 'http://api.letitbit.net/' - _API_KEY = 'tVL0gjqo5' - _TESTS = [ - { - 'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29', - 'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a', - 'info_dict': { - 'id': '00297.0036103fe3d513ef27915216fd29', - 'ext': 'flv', - 'title': 'Sink cut out machine', - 'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8', - 'thumbnail': r're:^https?://.*\.jpg$', - 'width': 540, - 'height': 360, - 'duration': 179, - 'filesize': 17822500, - }, - 'skip': 'Video has been removed', - }, - { - 'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a', - 'md5': '74f0a014d5b661f0f0e2361300d1620e', - 'info_dict': { - 'id': '77107.7f325710a627383d40540d8e991a', - 'ext': 'flv', - 'title': 'Operacion Condor.', - 'description': 'md5:7e68cb2fcda66833d5081c542491a9a3', - 'thumbnail': r're:^https?://.*\.jpg$', - 'width': 480, - 'height': 296, - 'duration': 6027, - 'filesize': 588257923, - }, - 'skip': 'Video has been removed', - }, - ] - - def _real_extract(self, url): - host, video_id = self._match_valid_url(url).groups() - - webpage = self._download_webpage( - 'http://%s/video/%s' % (host, video_id), - video_id, 'Downloading webpage') - - title = self._og_search_title(webpage) - - embed_webpage = self._download_webpage( - 'http://%s/embed/%s' % (host, video_id), - video_id, 'Downloading embed webpage') - video = self._parse_json(self._search_regex( - r'mvplayer\("#player"\s*,\s*({.+})', - embed_webpage, 'mvplayer'), video_id)['video'] - - return { - 'id': video_id, - 'title': title, - 'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage), - 'description': clean_html(self._og_search_description(webpage)), - 'duration': int_or_none(self._og_search_property('video:duration', webpage)), - 'url': video['ourUrl'], - } diff --git a/yt_dlp/extractor/mofosex.py b/yt_dlp/extractor/mofosex.py deleted file mode 100644 index 9cb6980c1..000000000 --- a/yt_dlp/extractor/mofosex.py +++ /dev/null @@ -1,70 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - int_or_none, - str_to_int, - unified_strdate, -) -from .keezmovies import KeezMoviesIE - - -class MofosexIE(KeezMoviesIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html' - _TESTS = [{ - 'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html', - 'md5': '558fcdafbb63a87c019218d6e49daf8a', - 'info_dict': { - 'id': '318131', - 'display_id': 'amateur-teen-playing-and-masturbating-318131', - 'ext': 'mp4', - 'title': 'amateur teen playing and masturbating', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20121114', - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'age_limit': 18, - } - }, { - # This video is no longer available - 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - webpage, info = self._extract_info(url) - - view_count = str_to_int(self._search_regex( - r'VIEWS:</span>\s*([\d,.]+)', webpage, 'view count', fatal=False)) - like_count = int_or_none(self._search_regex( - r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage, - 'like count', fatal=False)) - dislike_count = int_or_none(self._search_regex( - r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage, - 'like count', fatal=False)) - upload_date = unified_strdate(self._html_search_regex( - r'Added:</span>([^<]+)', webpage, 'upload date', fatal=False)) - - info.update({ - 'view_count': view_count, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'upload_date': upload_date, - 'thumbnail': self._og_search_thumbnail(webpage), - }) - - return info - - -class MofosexEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)' - _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)'] - _TESTS = [{ - 'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result( - 'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id), - ie=MofosexIE.ie_key(), video_id=video_id) diff --git a/yt_dlp/extractor/movieclips.py b/yt_dlp/extractor/movieclips.py deleted file mode 100644 index f7f2921fd..000000000 --- a/yt_dlp/extractor/movieclips.py +++ /dev/null @@ -1,47 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - smuggle_url, - float_or_none, - parse_iso8601, - update_url_query, -) - - -class MovieClipsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)' - _TEST = { - 'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597', - 'md5': '42b5a0352d4933a7bd54f2104f481244', - 'info_dict': { - 'id': 'pKIGmG83AqD9', - 'ext': 'mp4', - 'title': 'Warcraft Trailer 1', - 'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1446843055, - 'upload_date': '20151106', - 'uploader': 'Movieclips', - }, - 'add_ie': ['ThePlatform'], - 'skip': 'redirects to YouTube', - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video = next(v for v in self._parse_json(self._search_regex( - r'var\s+__REACT_ENGINE__\s*=\s*({.+});', - webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id) - - return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url(update_url_query( - video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}), - 'title': self._og_search_title(webpage), - 'description': self._html_search_meta('description', webpage), - 'duration': float_or_none(video.get('duration')), - 'timestamp': parse_iso8601(video.get('dateCreated')), - 'thumbnail': video.get('defaultImage'), - 'uploader': video.get('provider'), - } diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py index f91c53eba..77d1806a3 100644 --- a/yt_dlp/extractor/msn.py +++ b/yt_dlp/extractor/msn.py @@ -11,6 +11,7 @@ class MSNIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)' _TESTS = [{ 'url': 'https://www.msn.com/en-in/money/video/7-ways-to-get-rid-of-chest-congestion/vi-BBPxU6d', diff --git a/yt_dlp/extractor/mwave.py b/yt_dlp/extractor/mwave.py deleted file mode 100644 index efbfd9d43..000000000 --- a/yt_dlp/extractor/mwave.py +++ /dev/null @@ -1,87 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - parse_duration, -) - - -class MwaveIE(InfoExtractor): - _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' - _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s' - _TESTS = [{ - 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859', - # md5 is unstable - 'info_dict': { - 'id': '168859', - 'ext': 'flv', - 'title': '[M COUNTDOWN] SISTAR - SHAKE IT', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'M COUNTDOWN', - 'duration': 206, - 'view_count': int, - } - }, { - 'url': 'http://mwave.interest.me/en/mnettv/videodetail.m?searchVideoDetailVO.clip_id=176199', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - vod_info = self._download_json( - 'http://mwave.interest.me/onair/vod_info.m?vodtype=CL§orid=&endinfo=Y&id=%s' % video_id, - video_id, 'Download vod JSON') - - formats = [] - for num, cdn_info in enumerate(vod_info['cdn']): - stream_url = cdn_info.get('url') - if not stream_url: - continue - stream_name = cdn_info.get('name') or compat_str(num) - f4m_stream = self._download_json( - stream_url, video_id, - 'Download %s stream JSON' % stream_name) - f4m_url = f4m_stream.get('fileurl') - if not f4m_url: - continue - formats.extend( - self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name)) - - return { - 'id': video_id, - 'title': vod_info['title'], - 'thumbnail': vod_info.get('cover'), - 'uploader': vod_info.get('program_title'), - 'duration': parse_duration(vod_info.get('time')), - 'view_count': int_or_none(vod_info.get('hit')), - 'formats': formats, - } - - -class MwaveMeetGreetIE(InfoExtractor): - _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?meetgreet/view/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://mwave.interest.me/meetgreet/view/256', - 'info_dict': { - 'id': '173294', - 'ext': 'flv', - 'title': '[MEET&GREET] Park BoRam', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Mwave', - 'duration': 3634, - 'view_count': int, - } - }, { - 'url': 'http://mwave.interest.me/en/meetgreet/view/256', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - clip_id = self._html_search_regex( - r'<iframe[^>]+src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(\d+)', - webpage, 'clip ID') - clip_url = MwaveIE._URL_TEMPLATE % clip_id - return self.url_result(clip_url, 'Mwave', clip_id) diff --git a/yt_dlp/extractor/mychannels.py b/yt_dlp/extractor/mychannels.py deleted file mode 100644 index 8a70c1f7b..000000000 --- a/yt_dlp/extractor/mychannels.py +++ /dev/null @@ -1,35 +0,0 @@ -from .common import InfoExtractor - - -class MyChannelsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mychannels\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)' - _TEST = { - 'url': 'https://mychannels.com/missholland/miss-holland?production_id=3416', - 'md5': 'b8993daad4262dd68d89d651c0c52c45', - 'info_dict': { - 'id': 'wUUDZZep6vQD', - 'ext': 'mp4', - 'title': 'Miss Holland joins VOTE LEAVE', - 'description': 'Miss Holland | #13 Not a potato', - 'uploader': 'Miss Holland', - } - } - - def _real_extract(self, url): - id_type, url_id = self._match_valid_url(url).groups() - webpage = self._download_webpage(url, url_id) - video_data = self._html_search_regex(r'<div([^>]+data-%s-id="%s"[^>]+)>' % (id_type, url_id), webpage, 'video data') - - def extract_data_val(attr, fatal=False): - return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal) - minoto_id = extract_data_val('minoto-id') or self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id') - - return { - '_type': 'url_transparent', - 'url': 'minoto:%s' % minoto_id, - 'id': url_id, - 'title': extract_data_val('title', True), - 'description': extract_data_val('description'), - 'thumbnail': extract_data_val('image'), - 'uploader': extract_data_val('channel'), - } diff --git a/yt_dlp/extractor/myvi.py b/yt_dlp/extractor/myvi.py deleted file mode 100644 index df7200be2..000000000 --- a/yt_dlp/extractor/myvi.py +++ /dev/null @@ -1,100 +0,0 @@ -from .common import InfoExtractor -from .vimple import SprutoBaseIE - - -class MyviIE(SprutoBaseIE): - _VALID_URL = r'''(?x) - (?: - https?:// - (?:www\.)? - myvi\. - (?: - (?:ru/player|tv)/ - (?: - (?: - embed/html| - flash| - api/Video/Get - )/| - content/preloader\.swf\?.*\bid= - )| - ru/watch/ - )| - myvi: - ) - (?P<id>[\da-zA-Z_-]+) - ''' - _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1'] - _TESTS = [{ - 'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0', - 'md5': '571bbdfba9f9ed229dc6d34cc0f335bf', - 'info_dict': { - 'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43', - 'ext': 'mp4', - 'title': 'хозяин жизни', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 25, - }, - }, { - 'url': 'http://myvi.ru/player/content/preloader.swf?id=oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wOYf1WFpPfc_bWTKGVf_Zafr0', - 'only_matching': True, - }, { - 'url': 'http://myvi.ru/player/api/Video/Get/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0', - 'only_matching': True, - }, { - 'url': 'http://myvi.tv/embed/html/oTGTNWdyz4Zwy_u1nraolwZ1odenTd9WkTnRfIL9y8VOgHYqOHApE575x4_xxS9Vn0?ap=0', - 'only_matching': True, - }, { - 'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30', - 'only_matching': True, - }, { - 'url': 'https://www.myvi.ru/watch/YwbqszQynUaHPn_s82sx0Q2', - 'only_matching': True, - }, { - 'url': 'myvi:YwbqszQynUaHPn_s82sx0Q2', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - spruto = self._download_json( - 'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData'] - - return self._extract_spruto(spruto, video_id) - - -class MyviEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?myvi\.tv/(?:[^?]+\?.*?\bv=|embed/)(?P<id>[\da-z]+)' - _TESTS = [{ - 'url': 'https://www.myvi.tv/embed/ccdqic3wgkqwpb36x9sxg43t4r', - 'info_dict': { - 'id': 'b3ea0663-3234-469d-873e-7fecf36b31d1', - 'ext': 'mp4', - 'title': 'Твоя (original song).mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 277, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.myvi.tv/idmi6o?v=ccdqic3wgkqwpb36x9sxg43t4r#watch', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if MyviIE.suitable(url) else super(MyviEmbedIE, cls).suitable(url) - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://www.myvi.tv/embed/%s' % video_id, video_id) - - myvi_id = self._search_regex( - r'CreatePlayer\s*\(\s*["\'].*?\bv=([\da-zA-Z_]+)', - webpage, 'video id') - - return self.url_result('myvi:%s' % myvi_id, ie=MyviIE.ie_key()) diff --git a/yt_dlp/extractor/newstube.py b/yt_dlp/extractor/newstube.py deleted file mode 100644 index 820eb4ba7..000000000 --- a/yt_dlp/extractor/newstube.py +++ /dev/null @@ -1,75 +0,0 @@ -import base64 -import hashlib - -from .common import InfoExtractor -from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 -from ..utils import ( - int_or_none, - parse_codecs, - parse_duration, -) - - -class NewstubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)' - _TEST = { - 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', - 'md5': '9d10320ad473444352f72f746ccb8b8c', - 'info_dict': { - 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', - 'ext': 'mp4', - 'title': 'Телеканал CNN переместил город Славянск в Крым', - 'description': 'md5:419a8c9f03442bc0b0a794d689360335', - 'duration': 31.05, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - page = self._download_webpage(url, video_id) - title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True) - - video_guid = self._html_search_regex( - r'<meta\s+property="og:video(?::(?:(?:secure_)?url|iframe))?"\s+content="https?://(?:www\.)?newstube\.ru/embed/(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', - page, 'video GUID') - - enc_data = base64.b64decode(self._download_webpage( - 'https://www.newstube.ru/embed/api/player/getsources2', - video_guid, query={ - 'guid': video_guid, - 'ff': 3, - })) - key = hashlib.pbkdf2_hmac( - 'sha1', video_guid.replace('-', '').encode(), enc_data[:16], 1)[:16] - dec_data = unpad_pkcs7(aes_cbc_decrypt_bytes(enc_data[32:], key, enc_data[16:32])) - sources = self._parse_json(dec_data, video_guid) - - formats = [] - for source in sources: - source_url = source.get('Src') - if not source_url: - continue - height = int_or_none(source.get('Height')) - f = { - 'format_id': 'http' + ('-%dp' % height if height else ''), - 'url': source_url, - 'width': int_or_none(source.get('Width')), - 'height': height, - } - source_type = source.get('Type') - if source_type: - f.update(parse_codecs(self._search_regex( - r'codecs="([^"]+)"', source_type, 'codecs', fatal=False))) - formats.append(f) - - self._check_formats(formats, video_guid) - - return { - 'id': video_guid, - 'title': title, - 'description': self._html_search_meta(['description', 'og:description'], page), - 'thumbnail': self._html_search_meta(['og:image:secure_url', 'og:image', 'twitter:image'], page), - 'duration': parse_duration(self._html_search_meta('duration', page)), - 'formats': formats, - } diff --git a/yt_dlp/extractor/nick.py b/yt_dlp/extractor/nick.py index de22cb8d6..165d8ce9d 100644 --- a/yt_dlp/extractor/nick.py +++ b/yt_dlp/extractor/nick.py @@ -188,26 +188,6 @@ def _get_feed_url(self, uri, url=None): return self._remove_template_parameter(config['feedWithQueryParams']) -class NickNightIE(NickDeIE): # XXX: Do not subclass from concrete IE - IE_NAME = 'nicknight' - _VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde', - 'only_matching': True, - }, { - 'url': 'http://www.nicknight.at/shows/977-awkward', - 'only_matching': True, - }, { - 'url': 'http://www.nicknight.at/shows/1900-faking-it', - 'only_matching': True, - }] - - def _extract_mrss_url(self, webpage, *args): - return self._search_regex( - r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage, - 'mrss url', group='url') - - class NickRuIE(MTVServicesInfoExtractor): IE_NAME = 'nickelodeonru' _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu|com\.tr)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)' diff --git a/yt_dlp/extractor/normalboots.py b/yt_dlp/extractor/normalboots.py deleted file mode 100644 index 07babcd2c..000000000 --- a/yt_dlp/extractor/normalboots.py +++ /dev/null @@ -1,51 +0,0 @@ -from .common import InfoExtractor -from .jwplatform import JWPlatformIE - -from ..utils import ( - unified_strdate, -) - - -class NormalbootsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$' - _TEST = { - 'url': 'http://normalboots.com/video/home-alone-games-jontron/', - 'info_dict': { - 'id': 'home-alone-games-jontron', - 'ext': 'mp4', - 'title': 'Home Alone Games - JonTron - NormalBoots', - 'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/', - 'uploader': 'JonTron', - 'upload_date': '20140125', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['JWPlatform'], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_uploader = self._html_search_regex( - r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>', - webpage, 'uploader', fatal=False) - video_upload_date = unified_strdate(self._html_search_regex( - r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', - webpage, 'date', fatal=False)) - - jwplatform_url = JWPlatformIE._extract_url(webpage) - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': jwplatform_url, - 'ie_key': JWPlatformIE.ie_key(), - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'uploader': video_uploader, - 'upload_date': video_upload_date, - } diff --git a/yt_dlp/extractor/nosvideo.py b/yt_dlp/extractor/nosvideo.py deleted file mode 100644 index 7e9688c0b..000000000 --- a/yt_dlp/extractor/nosvideo.py +++ /dev/null @@ -1,72 +0,0 @@ -import re - -from .common import InfoExtractor -from ..networking import Request -from ..utils import ( - ExtractorError, - urlencode_postdata, - xpath_text, - xpath_with_ns, -) - -_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'}) - - -class NosVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \ - r'(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?' - _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml' - _FILE_DELETED_REGEX = r'<b>File Not Found</b>' - _TEST = { - 'url': 'http://nosvideo.com/?v=mu8fle7g7rpq', - 'md5': '6124ed47130d8be3eacae635b071e6b6', - 'info_dict': { - 'id': 'mu8fle7g7rpq', - 'ext': 'mp4', - 'title': 'big_buck_bunny_480p_surround-fix.avi.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - fields = { - 'id': video_id, - 'op': 'download1', - 'method_free': 'Continue to Video', - } - req = Request(url, urlencode_postdata(fields)) - req.headers['Content-type'] = 'application/x-www-form-urlencoded' - webpage = self._download_webpage(req, video_id, - 'Downloading download page') - if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, - expected=True) - - xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID') - playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id) - playlist = self._download_xml(playlist_url, video_id) - - track = playlist.find(_x('.//xspf:track')) - if track is None: - raise ExtractorError( - 'XML playlist is missing the \'track\' element', - expected=True) - title = xpath_text(track, _x('./xspf:title'), 'title') - url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True) - thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail') - if title is not None: - title = title.strip() - - formats = [{ - 'format_id': 'sd', - 'url': url, - }] - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } diff --git a/yt_dlp/extractor/nrl.py b/yt_dlp/extractor/nrl.py index 798d03417..1e8cf0b75 100644 --- a/yt_dlp/extractor/nrl.py +++ b/yt_dlp/extractor/nrl.py @@ -2,6 +2,7 @@ class NRLTVIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P<id>[^/?&#]+)' _TEST = { 'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/', diff --git a/yt_dlp/extractor/ooyala.py b/yt_dlp/extractor/ooyala.py deleted file mode 100644 index 65afccdb1..000000000 --- a/yt_dlp/extractor/ooyala.py +++ /dev/null @@ -1,230 +0,0 @@ -import base64 -import re - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_str, -) -from ..utils import ( - determine_ext, - float_or_none, - int_or_none, - smuggle_url, - try_get, - unsmuggle_url, -) - - -class OoyalaBaseIE(InfoExtractor): - _PLAYER_BASE = 'http://player.ooyala.com/' - _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' - _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s' - - def _extract(self, content_tree_url, video_id, domain=None, supportedformats=None, embed_token=None): - content_tree = self._download_json(content_tree_url, video_id)['content_tree'] - metadata = content_tree[list(content_tree)[0]] - embed_code = metadata['embed_code'] - pcode = metadata.get('asset_pcode') or embed_code - title = metadata['title'] - - auth_data = self._download_json( - self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code), - video_id, headers=self.geo_verification_headers(), query={ - 'domain': domain or 'player.ooyala.com', - 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', - 'embedToken': embed_token, - })['authorization_data'][embed_code] - - urls = [] - formats = [] - streams = auth_data.get('streams') or [{ - 'delivery_type': 'hls', - 'url': { - 'data': base64.b64encode(('http://player.ooyala.com/hls/player/all/%s.m3u8' % embed_code).encode()).decode(), - } - }] - for stream in streams: - url_data = try_get(stream, lambda x: x['url']['data'], compat_str) - if not url_data: - continue - s_url = compat_b64decode(url_data).decode('utf-8') - if not s_url or s_url in urls: - continue - urls.append(s_url) - ext = determine_ext(s_url, None) - delivery_type = stream.get('delivery_type') - if delivery_type == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds' or ext == 'f4m': - formats.extend(self._extract_f4m_formats( - s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif delivery_type == 'dash' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - s_url, embed_code, mpd_id='dash', fatal=False)) - elif delivery_type == 'smooth': - self._extract_ism_formats( - s_url, embed_code, ism_id='mss', fatal=False) - elif ext == 'smil': - formats.extend(self._extract_smil_formats( - s_url, embed_code, fatal=False)) - else: - formats.append({ - 'url': s_url, - 'ext': ext or delivery_type, - 'vcodec': stream.get('video_codec'), - 'format_id': delivery_type, - 'width': int_or_none(stream.get('width')), - 'height': int_or_none(stream.get('height')), - 'abr': int_or_none(stream.get('audio_bitrate')), - 'vbr': int_or_none(stream.get('video_bitrate')), - 'fps': float_or_none(stream.get('framerate')), - }) - if not formats and not auth_data.get('authorized'): - self.raise_no_formats('%s said: %s' % ( - self.IE_NAME, auth_data['message']), expected=True) - - subtitles = {} - for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items(): - sub_url = sub.get('url') - if not sub_url: - continue - subtitles[lang] = [{ - 'url': sub_url, - }] - - return { - 'id': embed_code, - 'title': title, - 'description': metadata.get('description'), - 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), - 'duration': float_or_none(metadata.get('duration'), 1000), - 'subtitles': subtitles, - 'formats': formats, - } - - -class OoyalaIE(OoyalaBaseIE): - _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)' - - _TESTS = [ - { - # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video - 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', - 'info_dict': { - 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', - 'ext': 'mp4', - 'title': 'Explaining Data Recovery from Hard Drives and SSDs', - 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', - 'duration': 853.386, - }, - # The video in the original webpage now uses PlayWire - 'skip': 'Ooyala said: movie expired', - }, { - # Only available for ipad - 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', - 'info_dict': { - 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', - 'ext': 'mp4', - 'title': 'Simulation Overview - Levels of Simulation', - 'duration': 194.948, - }, - }, - { - # Information available only through SAS api - # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187 - 'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx', - 'md5': 'a84001441b35ea492bc03736e59e7935', - 'info_dict': { - 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx', - 'ext': 'mp4', - 'title': 'Divide Tool Path.mp4', - 'duration': 204.405, - } - }, - { - # empty stream['url']['data'] - 'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is', - 'only_matching': True, - } - ] - - def _extract_from_webpage(self, url, webpage): - mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) - or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) - or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) - or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) - or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage)) - if mobj is not None: - embed_token = self._search_regex( - r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)', - webpage, 'ooyala embed token', default=None) - yield self._build_url_result(smuggle_url( - mobj.group('ec'), { - 'domain': url, - 'embed_token': embed_token, - })) - return - - # Look for multiple Ooyala embeds on SBN network websites - mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) - if mobj is not None: - for v in self._parse_json(mobj.group(1), self._generic_id(url), fatal=False) or []: - yield self._build_url_result(smuggle_url(v['provider_video_id'], {'domain': url})) - - @staticmethod - def _url_for_embed_code(embed_code): - return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code - - @classmethod - def _build_url_result(cls, embed_code): - return cls.url_result(cls._url_for_embed_code(embed_code), - ie=cls.ie_key()) - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - embed_code = self._match_id(url) - domain = smuggled_data.get('domain') - supportedformats = smuggled_data.get('supportedformats') - embed_token = smuggled_data.get('embed_token') - content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) - return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token) - - -class OoyalaExternalIE(OoyalaBaseIE): - _VALID_URL = r'''(?x) - (?: - ooyalaexternal:| - https?://.+?\.ooyala\.com/.*?\bexternalId= - ) - (?P<partner_id>[^:]+) - : - (?P<id>.+) - (?: - :| - .*?&pcode= - ) - (?P<pcode>.+?) - (?:&|$) - ''' - - _TEST = { - 'url': 'https://player.ooyala.com/player.js?externalId=espn:10365079&pcode=1kNG061cgaoolOncv54OAO1ceO-I&adSetCode=91cDU6NuXTGKz3OdjOxFdAgJVtQcKJnI&callback=handleEvents&hasModuleParams=1&height=968&playerBrandingId=7af3bd04449c444c964f347f11873075&targetReplaceId=videoPlayer&width=1656&wmode=opaque&allowScriptAccess=always', - 'info_dict': { - 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', - 'ext': 'mp4', - 'title': 'dm_140128_30for30Shorts___JudgingJewellv2', - 'duration': 1302.0, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - partner_id, video_id, pcode = self._match_valid_url(url).groups() - content_tree_url = self._CONTENT_TREE_BASE + 'external_id/%s/%s:%s' % (pcode, partner_id, video_id) - return self._extract(content_tree_url, video_id) diff --git a/yt_dlp/extractor/pandoratv.py b/yt_dlp/extractor/pandoratv.py deleted file mode 100644 index ccc78da57..000000000 --- a/yt_dlp/extractor/pandoratv.py +++ /dev/null @@ -1,128 +0,0 @@ -from .common import InfoExtractor -from ..compat import ( - compat_str, -) -from ..utils import ( - ExtractorError, - float_or_none, - parse_duration, - parse_qs, - str_to_int, - urlencode_postdata, -) - - -class PandoraTVIE(InfoExtractor): - IE_NAME = 'pandora.tv' - IE_DESC = '판도라TV' - _VALID_URL = r'''(?x) - https?:// - (?: - (?:www\.)?pandora\.tv/view/(?P<user_id>[^/]+)/(?P<id>\d+)| # new format - (?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?| # old format - m\.pandora\.tv/?\? # mobile - ) - ''' - _TESTS = [{ - 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2', - 'info_dict': { - 'id': '53294230', - 'ext': 'flv', - 'title': '頭を撫でてくれる?', - 'description': '頭を撫でてくれる?', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 39, - 'upload_date': '20151218', - 'uploader': 'カワイイ動物まとめ', - 'uploader_id': 'mikakim', - 'view_count': int, - 'like_count': int, - } - }, { - 'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744', - 'info_dict': { - 'id': '54721744', - 'ext': 'flv', - 'title': '[HD] JAPAN COUNTDOWN 170423', - 'description': '[HD] JAPAN COUNTDOWN 170423', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1704.9, - 'upload_date': '20170423', - 'uploader': 'GOGO_UCC', - 'uploader_id': 'gogoucc', - 'view_count': int, - 'like_count': int, - }, - 'params': { - # Test metadata only - 'skip_download': True, - }, - }, { - 'url': 'http://www.pandora.tv/view/mikakim/53294230#36797454_new', - 'only_matching': True, - }, { - 'url': 'http://m.pandora.tv/?c=view&ch_userid=mikakim&prgid=54600346', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - user_id = mobj.group('user_id') - video_id = mobj.group('id') - - if not user_id or not video_id: - qs = parse_qs(url) - video_id = qs.get('prgid', [None])[0] - user_id = qs.get('ch_userid', [None])[0] - if any(not f for f in (video_id, user_id,)): - raise ExtractorError('Invalid URL', expected=True) - - data = self._download_json( - 'http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid=%s&prgid=%s' - % (user_id, video_id), video_id) - - info = data['data']['rows']['vod_play_info']['result'] - - formats = [] - for format_id, format_url in info.items(): - if not format_url: - continue - height = self._search_regex( - r'^v(\d+)[Uu]rl$', format_id, 'height', default=None) - if not height: - continue - - play_url = self._download_json( - 'http://m.pandora.tv/?c=api&m=play_url', video_id, - data=urlencode_postdata({ - 'prgid': video_id, - 'runtime': info.get('runtime'), - 'vod_url': format_url, - }), - headers={ - 'Origin': url, - 'Content-Type': 'application/x-www-form-urlencoded', - }) - format_url = play_url.get('url') - if not format_url: - continue - - formats.append({ - 'format_id': '%sp' % height, - 'url': format_url, - 'height': int(height), - }) - - return { - 'id': video_id, - 'title': info['subject'], - 'description': info.get('body'), - 'thumbnail': info.get('thumbnail') or info.get('poster'), - 'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')), - 'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None, - 'uploader': info.get('nickname'), - 'uploader_id': info.get('upload_userid'), - 'view_count': str_to_int(info.get('hit')), - 'like_count': str_to_int(info.get('likecnt')), - 'formats': formats, - } diff --git a/yt_dlp/extractor/people.py b/yt_dlp/extractor/people.py deleted file mode 100644 index c5143c3ed..000000000 --- a/yt_dlp/extractor/people.py +++ /dev/null @@ -1,29 +0,0 @@ -from .common import InfoExtractor - - -class PeopleIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html' - - _TEST = { - 'url': 'http://www.people.com/people/videos/0,,20995451,00.html', - 'info_dict': { - 'id': 'ref:20995451', - 'ext': 'mp4', - 'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”', - 'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 246.318, - 'timestamp': 1458720585, - 'upload_date': '20160323', - 'uploader_id': '416418724', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['BrightcoveNew'], - } - - def _real_extract(self, url): - return self.url_result( - 'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s' - % self._match_id(url), 'BrightcoveNew') diff --git a/yt_dlp/extractor/playfm.py b/yt_dlp/extractor/playfm.py deleted file mode 100644 index e895ba480..000000000 --- a/yt_dlp/extractor/playfm.py +++ /dev/null @@ -1,70 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - int_or_none, - parse_iso8601, -) - - -class PlayFMIE(InfoExtractor): - IE_NAME = 'play.fm' - _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])' - - _TEST = { - 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12', - 'md5': 'c505f8307825a245d0c7ad1850001f22', - 'info_dict': { - 'id': '71276', - 'ext': 'mp3', - 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12', - 'description': '', - 'duration': 5627, - 'timestamp': 1406033781, - 'upload_date': '20140722', - 'uploader': 'Dan Drastic', - 'uploader_id': '71170', - 'view_count': int, - 'comment_count': int, - }, - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - slug = mobj.group('slug') - - recordings = self._download_json( - 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id) - - error = recordings.get('error') - if isinstance(error, dict): - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error.get('message')), - expected=True) - - audio_url = recordings['audio'] - video_id = compat_str(recordings.get('id') or video_id) - title = recordings['title'] - description = recordings.get('description') - duration = int_or_none(recordings.get('recordingDuration')) - timestamp = parse_iso8601(recordings.get('created_at')) - uploader = recordings.get('page', {}).get('title') - uploader_id = compat_str(recordings.get('page', {}).get('id')) - view_count = int_or_none(recordings.get('playCount')) - comment_count = int_or_none(recordings.get('commentCount')) - categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')] - - return { - 'id': video_id, - 'url': audio_url, - 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'view_count': view_count, - 'comment_count': comment_count, - 'categories': categories, - } diff --git a/yt_dlp/extractor/plays.py b/yt_dlp/extractor/plays.py deleted file mode 100644 index 9371f7b23..000000000 --- a/yt_dlp/extractor/plays.py +++ /dev/null @@ -1,49 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class PlaysTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})' - _TESTS = [{ - 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', - 'md5': 'dfeac1198506652b5257a62762cec7bc', - 'info_dict': { - 'id': '56af17f56c95335490', - 'ext': 'mp4', - 'title': 'Bjergsen - When you outplay the Azir wall', - 'description': 'Posted by Bjergsen', - } - }, { - 'url': 'https://plays.tv/embeds/56af17f56c95335490', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'https://plays.tv/video/%s' % video_id, video_id) - - info = self._search_json_ld(webpage, video_id,) - - mpd_url, sources = re.search( - r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>', - webpage).groups() - formats = self._extract_mpd_formats( - self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') - for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources): - formats.append({ - 'url': self._proto_relative_url(format_url), - 'format_id': 'http-' + format_id, - 'height': int_or_none(height), - }) - - info.update({ - 'id': video_id, - 'description': self._og_search_description(webpage), - 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage), - 'formats': formats, - }) - - return info diff --git a/yt_dlp/extractor/playvid.py b/yt_dlp/extractor/playvid.py deleted file mode 100644 index 1e0989d0a..000000000 --- a/yt_dlp/extractor/playvid.py +++ /dev/null @@ -1,90 +0,0 @@ -import re -import urllib.parse - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote -from ..utils import ExtractorError, clean_html - - -class PlayvidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' - _TESTS = [{ - 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', - 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', - 'info_dict': { - 'id': 'RnmBNgtrrJu', - 'ext': 'mp4', - 'title': 'md5:9256d01c6317e3f703848b5906880dc8', - 'duration': 82, - 'age_limit': 18, - }, - 'skip': 'Video removed due to ToS', - }, { - 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH', - 'md5': '39d49df503ad7b8f23a4432cbf046477', - 'info_dict': { - 'id': 'hwb0GpNkzgH', - 'ext': 'mp4', - 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park', - 'age_limit': 18, - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - m_error = re.search( - r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage) - if m_error: - raise ExtractorError(clean_html(m_error.group('msg')), expected=True) - - video_title = None - duration = None - video_thumbnail = None - formats = [] - - # most of the information is stored in the flashvars - flashvars = self._html_search_regex( - r'flashvars="(.+?)"', webpage, 'flashvars') - - infos = compat_urllib_parse_unquote(flashvars).split(r'&') - for info in infos: - videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) - if videovars_match: - key = videovars_match.group(1) - val = videovars_match.group(2) - - if key == 'title': - video_title = urllib.parse.unquote_plus(val) - if key == 'duration': - try: - duration = int(val) - except ValueError: - pass - if key == 'big_thumb': - video_thumbnail = val - - videourl_match = re.match( - r'^video_urls\]\[(?P<resolution>[0-9]+)p', key) - if videourl_match: - height = int(videourl_match.group('resolution')) - formats.append({ - 'height': height, - 'url': val, - }) - - # Extract title - should be in the flashvars; if not, look elsewhere - if video_title is None: - video_title = self._html_extract_title(webpage) - - return { - 'id': video_id, - 'formats': formats, - 'title': video_title, - 'thumbnail': video_thumbnail, - 'duration': duration, - 'description': None, - 'age_limit': 18 - } diff --git a/yt_dlp/extractor/porncom.py b/yt_dlp/extractor/porncom.py deleted file mode 100644 index c8ef240d7..000000000 --- a/yt_dlp/extractor/porncom.py +++ /dev/null @@ -1,99 +0,0 @@ -import re - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - int_or_none, - js_to_json, - parse_filesize, - str_to_int, -) - - -class PornComIE(InfoExtractor): - _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339', - 'md5': '3f30ce76267533cd12ba999263156de7', - 'info_dict': { - 'id': '2603339', - 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec', - 'ext': 'mp4', - 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 551, - 'view_count': int, - 'age_limit': 18, - 'categories': list, - 'tags': list, - }, - }, { - 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - - webpage = self._download_webpage(url, display_id) - - config = self._parse_json( - self._search_regex( - (r'=\s*({.+?})\s*;\s*v1ar\b', - r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='), - webpage, 'config', default='{}'), - display_id, transform_source=js_to_json, fatal=False) - - if config: - title = config['title'] - formats = [{ - 'url': stream['url'], - 'format_id': stream.get('id'), - 'height': int_or_none(self._search_regex( - r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None)) - } for stream in config['streams'] if stream.get('url')] - thumbnail = (compat_urlparse.urljoin( - config['thumbCDN'], config['poster']) - if config.get('thumbCDN') and config.get('poster') else None) - duration = int_or_none(config.get('length')) - else: - title = self._search_regex( - (r'<title>([^<]+)</title>', r'<h1[^>]*>([^<]+)</h1>'), - webpage, 'title') - formats = [{ - 'url': compat_urlparse.urljoin(url, format_url), - 'format_id': '%sp' % height, - 'height': int(height), - 'filesize_approx': parse_filesize(filesize), - } for format_url, height, filesize in re.findall( - r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<', - webpage)] - thumbnail = None - duration = None - - view_count = str_to_int(self._search_regex( - (r'Views:\s*</span>\s*<span>\s*([\d,.]+)', - r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage, - 'view count', fatal=False)) - - def extract_list(kind): - s = self._search_regex( - (r'(?s)%s:\s*</span>\s*<span>(.+?)</span>' % kind.capitalize(), - r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize()), - webpage, kind, fatal=False) - return re.findall(r'<a[^>]+>([^<]+)</a>', s or '') - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'formats': formats, - 'age_limit': 18, - 'categories': extract_list('categories'), - 'tags': extract_list('tags'), - } diff --git a/yt_dlp/extractor/pornez.py b/yt_dlp/extractor/pornez.py deleted file mode 100644 index bc45f865e..000000000 --- a/yt_dlp/extractor/pornez.py +++ /dev/null @@ -1,60 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - clean_html, - int_or_none, - get_element_by_class, - urljoin, -) - - -class PornezIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornez\.net/(?:video(?P<id>\w+)|watch)/' - _TESTS = [{ - 'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/', - 'info_dict': { - 'id': '344819', - 'ext': 'mp4', - 'title': 'mistresst funny_penis_names wmv', - 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 18, - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://pornez.net/watch/leana+lovings+stiff+for+stepdaughter/', - 'info_dict': { - 'id': '156161', - 'ext': 'mp4', - 'title': 'Watch leana lovings stiff for stepdaughter porn video.', - 'age_limit': 18, - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://pornez.net/videovzs27fj/tutor4k-e14-blue-wave-1080p-nbq-tutor4k-e14-blue-wave/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - if not video_id: - video_id = self._search_regex( - r'<link[^>]+\bhref=["\']https?://pornez.net/\?p=(\w+)["\']', webpage, 'id') - - iframe_src = self._html_search_regex(r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe') - iframe = self._download_webpage(urljoin('https://pornez.net', iframe_src), video_id) - - entries = self._parse_html5_media_entries(iframe_src, iframe, video_id)[0] - for fmt in entries['formats']: - height = self._search_regex(r'_(\d+)\.m3u8', fmt['url'], 'height') - fmt['format_id'] = '%sp' % height - fmt['height'] = int_or_none(height) - - entries.update({ - 'id': video_id, - 'title': (clean_html(get_element_by_class('video-title', webpage)) - or self._html_search_meta( - ['twitter:title', 'og:title', 'description'], webpage, 'title', default=None)), - 'thumbnail': self._html_search_meta(['thumbnailUrl'], webpage, 'thumb', default=None), - 'age_limit': 18, - }) - return entries diff --git a/yt_dlp/extractor/pornhd.py b/yt_dlp/extractor/pornhd.py deleted file mode 100644 index c8a1ec80b..000000000 --- a/yt_dlp/extractor/pornhd.py +++ /dev/null @@ -1,116 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - js_to_json, - merge_dicts, - urljoin, -) - - -class PornHdIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' - _TESTS = [{ - 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', - 'md5': '87f1540746c1d32ec7a2305c12b96b25', - 'info_dict': { - 'id': '9864', - 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', - 'ext': 'mp4', - 'title': 'Restroom selfie masturbation', - 'description': 'md5:3748420395e03e31ac96857a8f125b2b', - 'thumbnail': r're:^https?://.*\.jpg', - 'view_count': int, - 'like_count': int, - 'age_limit': 18, - }, - 'skip': 'HTTP Error 404: Not Found', - }, { - 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', - 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de', - 'info_dict': { - 'id': '1962', - 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video', - 'ext': 'mp4', - 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759', - 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', - 'thumbnail': r're:^https?://.*\.jpg', - 'view_count': int, - 'like_count': int, - 'age_limit': 18, - }, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id or video_id) - - title = self._html_search_regex( - [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)', - r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title') - - sources = self._parse_json(js_to_json(self._search_regex( - r"(?s)sources'?\s*[:=]\s*(\{.+?\})", - webpage, 'sources', default='{}')), video_id) - - info = {} - if not sources: - entries = self._parse_html5_media_entries(url, webpage, video_id) - if entries: - info = entries[0] - - if not sources and not info: - message = self._html_search_regex( - r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1', - webpage, 'error message', group='value') - raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) - - formats = [] - for format_id, video_url in sources.items(): - video_url = urljoin(url, video_url) - if not video_url: - continue - height = int_or_none(self._search_regex( - r'^(\d+)[pP]', format_id, 'height', default=None)) - formats.append({ - 'url': video_url, - 'ext': determine_ext(video_url, 'mp4'), - 'format_id': format_id, - 'height': height, - }) - if formats: - info['formats'] = formats - - description = self._html_search_regex( - (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>', - r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'), - webpage, 'description', fatal=False, - group='value') or self._html_search_meta( - 'description', webpage, default=None) or self._og_search_description(webpage) - view_count = int_or_none(self._html_search_regex( - r'(\d+) views\s*<', webpage, 'view count', fatal=False)) - thumbnail = self._search_regex( - r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage, - 'thumbnail', default=None, group='url') - - like_count = int_or_none(self._search_regex( - (r'(\d+)</span>\s*likes', - r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes', - r'class=["\']save-count["\'][^>]*>\s*(\d+)'), - webpage, 'like count', fatal=False)) - - return merge_dicts(info, { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'view_count': view_count, - 'like_count': like_count, - 'formats': formats, - 'age_limit': 18, - }) diff --git a/yt_dlp/extractor/radiobremen.py b/yt_dlp/extractor/radiobremen.py deleted file mode 100644 index 99ba050d0..000000000 --- a/yt_dlp/extractor/radiobremen.py +++ /dev/null @@ -1,59 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import parse_duration - - -class RadioBremenIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)' - IE_NAME = 'radiobremen' - - _TEST = { - 'url': 'http://www.radiobremen.de/mediathek/?id=141876', - 'info_dict': { - 'id': '141876', - 'ext': 'mp4', - 'duration': 178, - 'width': 512, - 'title': 'Druck auf Patrick Öztürk', - 'thumbnail': r're:https?://.*\.jpg$', - 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - meta_url = 'http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s' % video_id - meta_doc = self._download_webpage( - meta_url, video_id, 'Downloading metadata') - title = self._html_search_regex( - r'<h1.*>(?P<title>.+)</h1>', meta_doc, 'title') - description = self._html_search_regex( - r'<p>(?P<description>.*)</p>', meta_doc, 'description', fatal=False) - duration = parse_duration(self._html_search_regex( - r'Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>', - meta_doc, 'duration', fatal=False)) - - page_doc = self._download_webpage( - url, video_id, 'Downloading video information') - mobj = re.search( - r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", - page_doc) - video_url = ( - "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % - (video_id, video_id, mobj.group("secret"), mobj.group('width'))) - - formats = [{ - 'url': video_url, - 'ext': 'mp4', - 'width': int(mobj.group('width')), - }] - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'formats': formats, - 'thumbnail': mobj.group('thumbnail'), - } diff --git a/yt_dlp/extractor/recurbate.py b/yt_dlp/extractor/recurbate.py deleted file mode 100644 index d7294cb14..000000000 --- a/yt_dlp/extractor/recurbate.py +++ /dev/null @@ -1,42 +0,0 @@ -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ExtractorError, merge_dicts - - -class RecurbateIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?recurbate\.com/play\.php\?video=(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://recurbate.com/play.php?video=39161415', - 'md5': 'dd2b4ec57aa3e3572cb5cf0997fca99f', - 'info_dict': { - 'id': '39161415', - 'ext': 'mp4', - 'description': 'md5:db48d09e4d93fc715f47fd3d6b7edd51', - 'title': 'Performer zsnicole33 show on 2022-10-25 20:23, Chaturbate Archive – Recurbate', - 'age_limit': 18, - }, - 'skip': 'Website require membership.', - }] - - def _real_extract(self, url): - SUBSCRIPTION_MISSING_MESSAGE = 'This video is only available for registered users; Set your authenticated browser user agent via the --user-agent parameter.' - video_id = self._match_id(url) - try: - webpage = self._download_webpage(url, video_id) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 403: - self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies') - raise - token = self._html_search_regex(r'data-token="([^"]+)"', webpage, 'token') - video_url = f'https://recurbate.com/api/get.php?video={video_id}&token={token}' - - video_webpage = self._download_webpage(video_url, video_id) - if video_webpage == 'shall_subscribe': - self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies') - entries = self._parse_html5_media_entries(video_url, video_webpage, video_id) - return merge_dicts({ - 'id': video_id, - 'title': self._html_extract_title(webpage, 'title'), - 'description': self._og_search_description(webpage), - 'age_limit': self._rta_search(webpage), - }, entries[0]) diff --git a/yt_dlp/extractor/rice.py b/yt_dlp/extractor/rice.py deleted file mode 100644 index 3dd4d31de..000000000 --- a/yt_dlp/extractor/rice.py +++ /dev/null @@ -1,112 +0,0 @@ -import re - -from .common import InfoExtractor -from ..compat import compat_parse_qs -from ..utils import ( - xpath_text, - xpath_element, - int_or_none, - parse_iso8601, - ExtractorError, -) - - -class RICEIE(InfoExtractor): - _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)' - _TEST = { - 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw', - 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a', - 'info_dict': { - 'id': 'YEWIvbhb40aqdjMD1ALSqw', - 'ext': 'mp4', - 'title': 'Active Learning in Archeology', - 'upload_date': '20140616', - 'timestamp': 1402926346, - } - } - _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config' - - def _real_extract(self, url): - qs = compat_parse_qs(self._match_valid_url(url).group('query')) - if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'): - raise ExtractorError('Invalid URL', expected=True) - - portal_id = qs['PortalID'][0] - playlist_id = qs['DestinationID'][0] - content_id = qs['ContentID'][0] - - content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={ - 'portalId': portal_id, - 'playlistId': playlist_id, - 'contentId': content_id - }) - metadata = xpath_element(content_data, './/metaData', fatal=True) - title = xpath_text(metadata, 'primaryTitle', fatal=True) - encodings = xpath_element(content_data, './/encodings', fatal=True) - player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={ - 'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True), - 'contentId': content_id, - }) - - common_fmt = {} - dimensions = xpath_text(encodings, 'dimensions') - if dimensions: - wh = dimensions.split('x') - if len(wh) == 2: - common_fmt.update({ - 'width': int_or_none(wh[0]), - 'height': int_or_none(wh[1]), - }) - - formats = [] - rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS)) - if rtsp_path: - fmt = { - 'url': rtsp_path, - 'format_id': 'rtsp', - } - fmt.update(common_fmt) - formats.append(fmt) - for source in player_data.findall(self._xpath_ns('.//Source', self._NS)): - video_url = xpath_text(source, self._xpath_ns('File', self._NS)) - if not video_url: - continue - if '.m3u8' in video_url: - formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - fmt = { - 'url': video_url, - 'format_id': video_url.split(':')[0], - } - fmt.update(common_fmt) - rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) - if rtmp: - fmt.update({ - 'url': rtmp.group('url'), - 'play_path': rtmp.group('playpath'), - 'app': rtmp.group('app'), - 'ext': 'flv', - }) - formats.append(fmt) - - thumbnails = [] - for content_asset in content_data.findall('.//contentAssets'): - asset_type = xpath_text(content_asset, 'type') - if asset_type == 'image': - image_url = xpath_text(content_asset, 'httpPath') - if not image_url: - continue - thumbnails.append({ - 'id': xpath_text(content_asset, 'ID'), - 'url': image_url, - }) - - return { - 'id': content_id, - 'title': title, - 'description': xpath_text(metadata, 'abstract'), - 'duration': int_or_none(xpath_text(metadata, 'duration')), - 'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')), - 'thumbnails': thumbnails, - 'formats': formats, - } diff --git a/yt_dlp/extractor/rtl2.py b/yt_dlp/extractor/rtl2.py index 056cf87d2..07e1aa3ce 100644 --- a/yt_dlp/extractor/rtl2.py +++ b/yt_dlp/extractor/rtl2.py @@ -1,16 +1,7 @@ import re from .common import InfoExtractor -from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 -from ..compat import ( - compat_b64decode, - compat_str, -) -from ..utils import ( - ExtractorError, - int_or_none, - strip_or_none, -) +from ..utils import int_or_none class RTL2IE(InfoExtractor): @@ -102,92 +93,3 @@ def _real_extract(self, url): 'duration': int_or_none(video_info.get('duration')), 'formats': formats, } - - -class RTL2YouBaseIE(InfoExtractor): - _BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/' - - -class RTL2YouIE(RTL2YouBaseIE): - IE_NAME = 'rtl2:you' - _VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du', - 'info_dict': { - 'id': '15740', - 'ext': 'mp4', - 'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!', - 'description': 'md5:ddaa95c61b372b12b66e115b2772fe01', - 'age_limit': 12, - }, - }, { - 'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712', - 'only_matching': True, - }] - _AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!' - _GEO_COUNTRIES = ['DE'] - - def _real_extract(self, url): - video_id = self._match_id(url) - - stream_data = self._download_json( - self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id) - - data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':') - stream_url = unpad_pkcs7(aes_cbc_decrypt_bytes( - compat_b64decode(data), self._AES_KEY, compat_b64decode(iv))) - if b'rtl2_you_video_not_found' in stream_url: - raise ExtractorError('video not found', expected=True) - - formats = self._extract_m3u8_formats(stream_url.decode(), video_id, 'mp4', 'm3u8_native') - - video_data = self._download_json( - self._BACKWERK_BASE_URL + 'video/' + video_id, video_id) - - series = video_data.get('formatTitle') - title = episode = video_data.get('title') or series - if series and series != title: - title = '%s - %s' % (series, title) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': strip_or_none(video_data.get('description')), - 'thumbnail': video_data.get('image'), - 'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000), - 'series': series, - 'episode': episode, - 'age_limit': int_or_none(video_data.get('minimumAge')), - } - - -class RTL2YouSeriesIE(RTL2YouBaseIE): - IE_NAME = 'rtl2:you:series' - _VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)' - _TEST = { - 'url': 'http://you.rtl2.de/videos/115/dragon-ball', - 'info_dict': { - 'id': '115', - }, - 'playlist_mincount': 5, - } - - def _real_extract(self, url): - series_id = self._match_id(url) - stream_data = self._download_json( - self._BACKWERK_BASE_URL + 'videos', - series_id, query={ - 'formatId': series_id, - 'limit': 1000000000, - }) - - entries = [] - for video in stream_data.get('videos', []): - video_id = compat_str(video['videoId']) - if not video_id: - continue - entries.append(self.url_result( - 'http://you.rtl2.de/video/%s/%s' % (series_id, video_id), - 'RTL2You', video_id)) - return self.playlist_result(entries, series_id) diff --git a/yt_dlp/extractor/rtvnh.py b/yt_dlp/extractor/rtvnh.py deleted file mode 100644 index 7c6174494..000000000 --- a/yt_dlp/extractor/rtvnh.py +++ /dev/null @@ -1,58 +0,0 @@ -from .common import InfoExtractor -from ..utils import ExtractorError - - -class RTVNHIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.rtvnh.nl/video/131946', - 'md5': 'cdbec9f44550763c8afc96050fa747dc', - 'info_dict': { - 'id': '131946', - 'ext': 'mp4', - 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', - 'thumbnail': r're:^https?:.*\.jpg$' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - meta = self._parse_json(self._download_webpage( - 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) - - status = meta.get('status') - if status != 200: - raise ExtractorError( - '%s returned error code %d' % (self.IE_NAME, status), expected=True) - - formats = [] - rtmp_formats = self._extract_smil_formats( - 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) - formats.extend(rtmp_formats) - - for rtmp_format in rtmp_formats: - rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) - rtsp_format = rtmp_format.copy() - del rtsp_format['play_path'] - del rtsp_format['ext'] - rtsp_format.update({ - 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), - 'url': rtmp_url.replace('rtmp://', 'rtsp://'), - 'protocol': 'rtsp', - }) - formats.append(rtsp_format) - http_base_url = rtmp_url.replace('rtmp://', 'http://') - formats.extend(self._extract_m3u8_formats( - http_base_url + '/playlist.m3u8', video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - http_base_url + '/manifest.f4m', - video_id, f4m_id='hds', fatal=False)) - - return { - 'id': video_id, - 'title': meta['title'].strip(), - 'thumbnail': meta.get('image'), - 'formats': formats - } diff --git a/yt_dlp/extractor/ruhd.py b/yt_dlp/extractor/ruhd.py deleted file mode 100644 index abaa3f9ea..000000000 --- a/yt_dlp/extractor/ruhd.py +++ /dev/null @@ -1,42 +0,0 @@ -from .common import InfoExtractor - - -class RUHDIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)' - _TEST = { - 'url': 'http://www.ruhd.ru/play.php?vid=207', - 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', - 'info_dict': { - 'id': '207', - 'ext': 'divx', - 'title': 'КОТ бааааам', - 'description': 'классный кот)', - 'thumbnail': r're:^http://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_url = self._html_search_regex( - r'<param name="src" value="([^"]+)"', webpage, 'video url') - title = self._html_search_regex( - r'<title>([^<]+) RUHD\.ru - Видео Высокого качества №1 в России!</title>', - webpage, 'title') - description = self._html_search_regex( - r'(?s)<div id="longdesc">(.+?)<span id="showlink">', - webpage, 'description', fatal=False) - thumbnail = self._html_search_regex( - r'<param name="previewImage" value="([^"]+)"', - webpage, 'thumbnail', fatal=False) - if thumbnail: - thumbnail = 'http://www.ruhd.ru' + thumbnail - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - } diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py index d839ffcde..9c2ca8c51 100644 --- a/yt_dlp/extractor/scte.py +++ b/yt_dlp/extractor/scte.py @@ -46,6 +46,7 @@ def is_logged(webpage): class SCTEIE(SCTEBaseIE): + _WORKING = False _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)' _TESTS = [{ 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', @@ -93,6 +94,7 @@ def _real_extract(self, url): class SCTECourseIE(SCTEBaseIE): + _WORKING = False _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)' _TESTS = [{ 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', diff --git a/yt_dlp/extractor/shared.py b/yt_dlp/extractor/shared.py deleted file mode 100644 index 9a237b320..000000000 --- a/yt_dlp/extractor/shared.py +++ /dev/null @@ -1,138 +0,0 @@ -import urllib.parse - -from .common import InfoExtractor -from ..compat import compat_b64decode -from ..utils import ( - KNOWN_EXTENSIONS, - ExtractorError, - determine_ext, - int_or_none, - js_to_json, - parse_filesize, - rot47, - url_or_none, - urlencode_postdata, -) - - -class SharedBaseIE(InfoExtractor): - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage, urlh = self._download_webpage_handle(url, video_id) - - if self._FILE_NOT_FOUND in webpage: - raise ExtractorError( - 'Video %s does not exist' % video_id, expected=True) - - video_url = self._extract_video_url(webpage, video_id, url) - - title = self._extract_title(webpage) - filesize = int_or_none(self._extract_filesize(webpage)) - - return { - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'filesize': filesize, - 'title': title, - } - - def _extract_title(self, webpage): - return compat_b64decode(self._html_search_meta( - 'full:title', webpage, 'title')).decode('utf-8') - - def _extract_filesize(self, webpage): - return self._html_search_meta( - 'full:size', webpage, 'file size', fatal=False) - - -class SharedIE(SharedBaseIE): - IE_DESC = 'shared.sx' - _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})' - _FILE_NOT_FOUND = '>File does not exist<' - - _TEST = { - 'url': 'http://shared.sx/0060718775', - 'md5': '106fefed92a8a2adb8c98e6a0652f49b', - 'info_dict': { - 'id': '0060718775', - 'ext': 'mp4', - 'title': 'Bmp4', - 'filesize': 1720110, - }, - } - - def _extract_video_url(self, webpage, video_id, url): - download_form = self._hidden_inputs(webpage) - - video_page = self._download_webpage( - url, video_id, 'Downloading video page', - data=urlencode_postdata(download_form), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': url, - }) - - video_url = self._html_search_regex( - r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', - video_page, 'video URL', group='url') - - return video_url - - -class VivoIE(SharedBaseIE): - IE_DESC = 'vivo.sx' - _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})' - _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' - - _TESTS = [{ - 'url': 'http://vivo.sx/d7ddda0e78', - 'md5': '15b3af41be0b4fe01f4df075c2678b2c', - 'info_dict': { - 'id': 'd7ddda0e78', - 'ext': 'mp4', - 'title': 'Chicken', - 'filesize': 515659, - }, - }, { - 'url': 'http://vivo.st/d7ddda0e78', - 'only_matching': True, - }] - - def _extract_title(self, webpage): - title = self._html_search_regex( - r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage, - 'title', default=None, group='title') - if title: - ext = determine_ext(title) - if ext.lower() in KNOWN_EXTENSIONS: - title = title.rpartition('.' + ext)[0] - return title - return self._og_search_title(webpage) - - def _extract_filesize(self, webpage): - return parse_filesize(self._search_regex( - r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', - webpage, 'filesize', fatal=False)) - - def _extract_video_url(self, webpage, video_id, url): - def decode_url_old(encoded_url): - return compat_b64decode(encoded_url).decode('utf-8') - - stream_url = self._search_regex( - r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'stream url', default=None, group='url') - if stream_url: - stream_url = url_or_none(decode_url_old(stream_url)) - if stream_url: - return stream_url - - def decode_url(encoded_url): - return rot47(urllib.parse.unquote_plus(encoded_url)) - - return decode_url(self._parse_json( - self._search_regex( - r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage, - 'stream'), - video_id, transform_source=js_to_json)['source']) diff --git a/yt_dlp/extractor/sky.py b/yt_dlp/extractor/sky.py index 0a8b6cc76..574ac219c 100644 --- a/yt_dlp/extractor/sky.py +++ b/yt_dlp/extractor/sky.py @@ -3,9 +3,7 @@ from .common import InfoExtractor from ..utils import ( extract_attributes, - smuggle_url, strip_or_none, - urljoin, ) @@ -13,29 +11,10 @@ class SkyBaseIE(InfoExtractor): BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' _SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)' - def _process_ooyala_element(self, webpage, sdc_el, url): + def _process_video_element(self, webpage, sdc_el, url): sdc = extract_attributes(sdc_el) provider = sdc.get('data-provider') - if provider == 'ooyala': - video_id = sdc['data-sdc-video-id'] - video_url = 'ooyala:%s' % video_id - ie_key = 'Ooyala' - ooyala_el = self._search_regex( - r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id, - webpage, 'video data', fatal=False) - if ooyala_el: - ooyala_attrs = extract_attributes(ooyala_el) or {} - if ooyala_attrs.get('data-token-required') == 'true': - token_fetch_url = (self._parse_json(ooyala_attrs.get( - 'data-token-fetch-options', '{}'), - video_id, fatal=False) or {}).get('url') - if token_fetch_url: - embed_token = self._download_json(urljoin( - url, token_fetch_url), video_id, fatal=False) - if embed_token: - video_url = smuggle_url( - video_url, {'embed_token': embed_token}) - elif provider == 'brightcove': + if provider == 'brightcove': video_id = sdc['data-video-id'] account_id = sdc.get('data-account-id') or '6058004172001' player_id = sdc.get('data-player-id') or 'RC9PQUaJ6' @@ -52,7 +31,7 @@ def _process_ooyala_element(self, webpage, sdc_el, url): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - info = self._process_ooyala_element(webpage, self._search_regex( + info = self._process_video_element(webpage, self._search_regex( self._SDC_EL_REGEX, webpage, 'sdc element'), url) info.update({ 'title': self._og_search_title(webpage), @@ -73,7 +52,7 @@ class SkySportsIE(SkyBaseIE): 'title': 'Bale: It\'s our time to shine', 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d', }, - 'add_ie': ['Ooyala'], + 'add_ie': ['BrightcoveNew'], }, { 'url': 'https://www.skysports.com/watch/video/sports/f1/12160544/abu-dhabi-gp-the-notebook', 'only_matching': True, @@ -122,7 +101,7 @@ def _real_extract(self, url): article_id = self._match_id(url) webpage = self._download_webpage(url, article_id) - entries = [self._process_ooyala_element(webpage, sdc_el, url) + entries = [self._process_video_element(webpage, sdc_el, url) for sdc_el in re.findall(self._SDC_EL_REGEX, webpage)] return self.playlist_result( @@ -149,7 +128,7 @@ def _real_extract(self, url): entries = [] for sdc_el in re.findall(self._SDC_EL_REGEX, webpage): - entries.append(self._process_ooyala_element(webpage, sdc_el, url)) + entries.append(self._process_video_element(webpage, sdc_el, url)) return self.playlist_result( entries, article_id, self._og_search_title(webpage), diff --git a/yt_dlp/extractor/spankwire.py b/yt_dlp/extractor/spankwire.py deleted file mode 100644 index 334b29773..000000000 --- a/yt_dlp/extractor/spankwire.py +++ /dev/null @@ -1,174 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - merge_dicts, - str_or_none, - str_to_int, - url_or_none, -) - - -class SpankwireIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?spankwire\.com/ - (?: - [^/]+/video| - EmbedPlayer\.aspx/?\?.*?\bArticleId= - ) - (?P<id>\d+) - ''' - _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)'] - _TESTS = [{ - # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4 - 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', - 'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd', - 'info_dict': { - 'id': '103545', - 'ext': 'mp4', - 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', - 'description': 'Crazy Bitch X rated music video.', - 'duration': 222, - 'uploader': 'oreusz', - 'uploader_id': '124697', - 'timestamp': 1178587885, - 'upload_date': '20070508', - 'average_rating': float, - 'view_count': int, - 'comment_count': int, - 'age_limit': 18, - 'categories': list, - 'tags': list, - }, - }, { - # download URL pattern: */mp4_<format_id>_<video_id>.mp4 - 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', - 'md5': '09b3c20833308b736ae8902db2f8d7e6', - 'info_dict': { - 'id': '1921551', - 'ext': 'mp4', - 'title': 'Titcums Compiloation I', - 'description': 'cum on tits', - 'uploader': 'dannyh78999', - 'uploader_id': '3056053', - 'upload_date': '20150822', - 'age_limit': 18, - }, - 'params': { - 'proxy': '127.0.0.1:8118' - }, - 'skip': 'removed', - }, { - 'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'https://www.spankwire.com/api/video/%s.json' % video_id, video_id) - - title = video['title'] - - formats = [] - videos = video.get('videos') - if isinstance(videos, dict): - for format_id, format_url in videos.items(): - video_url = url_or_none(format_url) - if not format_url: - continue - height = int_or_none(self._search_regex( - r'(\d+)[pP]', format_id, 'height', default=None)) - m = re.search( - r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', video_url) - if m: - tbr = int(m.group('tbr')) - height = height or int(m.group('height')) - else: - tbr = None - formats.append({ - 'url': video_url, - 'format_id': '%dp' % height if height else format_id, - 'height': height, - 'tbr': tbr, - }) - m3u8_url = url_or_none(video.get('HLS')) - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - - view_count = str_to_int(video.get('viewed')) - - thumbnails = [] - for preference, t in enumerate(('', '2x'), start=0): - thumbnail_url = url_or_none(video.get('poster%s' % t)) - if not thumbnail_url: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'preference': preference, - }) - - def extract_names(key): - entries_list = video.get(key) - if not isinstance(entries_list, list): - return - entries = [] - for entry in entries_list: - name = str_or_none(entry.get('name')) - if name: - entries.append(name) - return entries - - categories = extract_names('categories') - tags = extract_names('tags') - - uploader = None - info = {} - - webpage = self._download_webpage( - 'https://www.spankwire.com/_/video%s/' % video_id, video_id, - fatal=False) - if webpage: - info = self._search_json_ld(webpage, video_id, default={}) - thumbnail_url = None - if 'thumbnail' in info: - thumbnail_url = url_or_none(info['thumbnail']) - del info['thumbnail'] - if not thumbnail_url: - thumbnail_url = self._og_search_thumbnail(webpage) - if thumbnail_url: - thumbnails.append({ - 'url': thumbnail_url, - 'preference': 10, - }) - uploader = self._html_search_regex( - r'(?s)by\s*<a[^>]+\bclass=["\']uploaded__by[^>]*>(.+?)</a>', - webpage, 'uploader', fatal=False) - if not view_count: - view_count = str_to_int(self._search_regex( - r'data-views=["\']([\d,.]+)', webpage, 'view count', - fatal=False)) - - return merge_dicts({ - 'id': video_id, - 'title': title, - 'description': video.get('description'), - 'duration': int_or_none(video.get('duration')), - 'thumbnails': thumbnails, - 'uploader': uploader, - 'uploader_id': str_or_none(video.get('userId')), - 'timestamp': int_or_none(video.get('time_approved_on')), - 'average_rating': float_or_none(video.get('rating')), - 'view_count': view_count, - 'comment_count': int_or_none(video.get('comments')), - 'age_limit': 18, - 'categories': categories, - 'tags': tags, - 'formats': formats, - }, info) diff --git a/yt_dlp/extractor/srmediathek.py b/yt_dlp/extractor/srmediathek.py index 3cc39870f..f0b3b585f 100644 --- a/yt_dlp/extractor/srmediathek.py +++ b/yt_dlp/extractor/srmediathek.py @@ -6,6 +6,7 @@ class SRMediathekIE(ARDMediathekBaseIE): + _WORKING = False IE_NAME = 'sr:mediathek' IE_DESC = 'Saarländischer Rundfunk' _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)' diff --git a/yt_dlp/extractor/streamcloud.py b/yt_dlp/extractor/streamcloud.py deleted file mode 100644 index 728980921..000000000 --- a/yt_dlp/extractor/streamcloud.py +++ /dev/null @@ -1,75 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - urlencode_postdata, -) - - -class StreamcloudIE(InfoExtractor): - IE_NAME = 'streamcloud.eu' - _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?' - - _TESTS = [{ - 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', - 'md5': '6bea4c7fa5daaacc2a946b7146286686', - 'info_dict': { - 'id': 'skp9j99s4bpz', - 'ext': 'mp4', - 'title': 'youtube-dl test video \'/\\ ä ↭', - }, - 'skip': 'Only available from the EU' - }, { - 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - url = 'http://streamcloud.eu/%s' % video_id - - orig_webpage = self._download_webpage(url, video_id) - - if '>File Not Found<' in orig_webpage: - raise ExtractorError( - 'Video %s does not exist' % video_id, expected=True) - - fields = re.findall(r'''(?x)<input\s+ - type="(?:hidden|submit)"\s+ - name="([^"]+)"\s+ - (?:id="[^"]+"\s+)? - value="([^"]*)" - ''', orig_webpage) - - self._sleep(6, video_id) - - webpage = self._download_webpage( - url, video_id, data=urlencode_postdata(fields), headers={ - b'Content-Type': b'application/x-www-form-urlencoded', - }) - - try: - title = self._html_search_regex( - r'<h1[^>]*>([^<]+)<', webpage, 'title') - video_url = self._search_regex( - r'file:\s*"([^"]+)"', webpage, 'video URL') - except ExtractorError: - message = self._html_search_regex( - r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>', - webpage, 'message', default=None, group='message') - if message: - raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) - raise - thumbnail = self._search_regex( - r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False) - - return { - 'id': video_id, - 'title': title, - 'url': video_url, - 'thumbnail': thumbnail, - 'http_headers': { - 'Referer': url, - }, - } diff --git a/yt_dlp/extractor/swrmediathek.py b/yt_dlp/extractor/swrmediathek.py deleted file mode 100644 index 38bdfced7..000000000 --- a/yt_dlp/extractor/swrmediathek.py +++ /dev/null @@ -1,111 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - parse_duration, - int_or_none, - determine_protocol, -) - - -class SWRMediathekIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - - _TESTS = [{ - 'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6', - 'md5': '8c5f6f0172753368547ca8413a7768ac', - 'info_dict': { - 'id': '849790d0-dab8-11e3-a953-0026b975f2e6', - 'ext': 'mp4', - 'title': 'SWR odysso', - 'description': 'md5:2012e31baad36162e97ce9eb3f157b8a', - 'thumbnail': r're:^http:.*\.jpg$', - 'duration': 2602, - 'upload_date': '20140515', - 'uploader': 'SWR Fernsehen', - 'uploader_id': '990030', - }, - }, { - 'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6', - 'md5': 'b10ab854f912eecc5a6b55cd6fc1f545', - 'info_dict': { - 'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6', - 'ext': 'mp4', - 'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen', - 'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2', - 'thumbnail': r're:http://.*\.jpg', - 'duration': 5305, - 'upload_date': '20140516', - 'uploader': 'SWR Fernsehen', - 'uploader_id': '990030', - }, - 'skip': 'redirect to http://swrmediathek.de/index.htm?hinweis=swrlink', - }, { - 'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6', - 'md5': '4382e4ef2c9d7ce6852535fa867a0dd3', - 'info_dict': { - 'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6', - 'ext': 'mp3', - 'title': 'Saša Stanišic: Vor dem Fest', - 'description': 'md5:5b792387dc3fbb171eb709060654e8c9', - 'thumbnail': r're:http://.*\.jpg', - 'duration': 3366, - 'upload_date': '20140520', - 'uploader': 'SWR 2', - 'uploader_id': '284670', - }, - 'skip': 'redirect to http://swrmediathek.de/index.htm?hinweis=swrlink', - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, - video_id, 'Downloading video JSON') - - attr = video['attr'] - title = attr['entry_title'] - media_type = attr.get('entry_etype') - - formats = [] - for entry in video.get('sub', []): - if entry.get('name') != 'entry_media': - continue - - entry_attr = entry.get('attr', {}) - f_url = entry_attr.get('val2') - if not f_url: - continue - codec = entry_attr.get('val0') - if codec == 'm3u8': - formats.extend(self._extract_m3u8_formats( - f_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif codec == 'f4m': - formats.extend(self._extract_f4m_formats( - f_url + '?hdcore=3.7.0', video_id, - f4m_id='hds', fatal=False)) - else: - formats.append({ - 'format_id': determine_protocol({'url': f_url}), - 'url': f_url, - 'quality': int_or_none(entry_attr.get('val1')), - 'vcodec': codec if media_type == 'Video' else 'none', - 'acodec': codec if media_type == 'Audio' else None, - }) - - upload_date = None - entry_pdatet = attr.get('entry_pdatet') - if entry_pdatet: - upload_date = entry_pdatet[:-4] - - return { - 'id': video_id, - 'title': title, - 'description': attr.get('entry_descl'), - 'thumbnail': attr.get('entry_image_16_9'), - 'duration': parse_duration(attr.get('entry_durat')), - 'upload_date': upload_date, - 'uploader': attr.get('channel_title'), - 'uploader_id': attr.get('channel_idkey'), - 'formats': formats, - } diff --git a/yt_dlp/extractor/techtalks.py b/yt_dlp/extractor/techtalks.py deleted file mode 100644 index d37de360b..000000000 --- a/yt_dlp/extractor/techtalks.py +++ /dev/null @@ -1,80 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - get_element_by_attribute, - clean_html, -) - - -class TechTalksIE(InfoExtractor): - _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)' - - _TESTS = [{ - 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', - 'info_dict': { - 'id': '57758', - 'title': 'Learning Topic Models --- Going beyond SVD', - }, - 'playlist': [ - { - 'info_dict': { - 'id': '57758', - 'ext': 'flv', - 'title': 'Learning Topic Models --- Going beyond SVD', - }, - }, - { - 'info_dict': { - 'id': '57758-slides', - 'ext': 'flv', - 'title': 'Learning Topic Models --- Going beyond SVD', - }, - }, - ], - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - 'url': 'http://techtalks.tv/talks/57758', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - talk_id = mobj.group('id') - webpage = self._download_webpage(url, talk_id) - rtmp_url = self._search_regex( - r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url') - play_path = self._search_regex( - r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', - webpage, 'presenter play path') - title = clean_html(get_element_by_attribute('class', 'title', webpage)) - video_info = { - 'id': talk_id, - 'title': title, - 'url': rtmp_url, - 'play_path': play_path, - 'ext': 'flv', - } - m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage) - if m_slides is None: - return video_info - else: - return { - '_type': 'playlist', - 'id': talk_id, - 'title': title, - 'entries': [ - video_info, - # The slides video - { - 'id': talk_id + '-slides', - 'title': title, - 'url': rtmp_url, - 'play_path': m_slides.group(1), - 'ext': 'flv', - }, - ], - } diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index 20bb82420..a3f0c7cda 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -77,7 +77,6 @@ class TelecincoIE(InfoExtractor): 'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html', 'only_matching': True, }, { - # ooyala video 'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html', 'only_matching': True, }] diff --git a/yt_dlp/extractor/tinypic.py b/yt_dlp/extractor/tinypic.py deleted file mode 100644 index 216208cbd..000000000 --- a/yt_dlp/extractor/tinypic.py +++ /dev/null @@ -1,54 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class TinyPicIE(InfoExtractor): - IE_NAME = 'tinypic' - IE_DESC = 'tinypic.com videos' - _VALID_URL = r'https?://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+' - - _TESTS = [ - { - 'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8', - 'md5': '609b74432465364e72727ebc6203f044', - 'info_dict': { - 'id': '6xw7tc', - 'ext': 'flv', - 'title': 'shadow phenomenon weird', - }, - }, - { - 'url': 'http://de.tinypic.com/player.php?v=dy90yh&s=8', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id, 'Downloading page') - - mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n' - r'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage) - if mobj is None: - raise ExtractorError('Video %s does not exist' % video_id, expected=True) - - file_id = mobj.group('fileid') - server_id = mobj.group('serverid') - - KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting' - keywords = self._html_search_meta('keywords', webpage, 'title') - title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else '' - - video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id) - thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id) - - return { - 'id': file_id, - 'url': video_url, - 'thumbnail': thumbnail, - 'title': title - } diff --git a/yt_dlp/extractor/tokentube.py b/yt_dlp/extractor/tokentube.py deleted file mode 100644 index d022e2753..000000000 --- a/yt_dlp/extractor/tokentube.py +++ /dev/null @@ -1,153 +0,0 @@ -import functools -import re - -from .common import InfoExtractor -from ..utils import ( - clean_html, - get_element_by_class, - parse_count, - remove_end, - unified_strdate, - js_to_json, - OnDemandPagedList, -) - - -class TokentubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tokentube\.net/(?:view\?[vl]=|[vl]/)(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://tokentube.net/l/3236632011/Praise-A-Thon-Pastori-Chrisin-ja-Pastori-Bennyn-kanssa-27-8-2021', - 'info_dict': { - 'id': '3236632011', - 'ext': 'mp4', - 'title': 'Praise-A-Thon Pastori Chrisin ja Pastori Bennyn kanssa 27.8.2021', - 'description': '', - 'uploader': 'Pastori Chris - Rapsodia.fi', - 'upload_date': '20210827', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://tokentube.net/v/3950239124/Linux-Ubuntu-Studio-perus-k%C3%A4ytt%C3%B6', - 'md5': '0e1f00421f501f5eada9890d38fcfb56', - 'info_dict': { - 'id': '3950239124', - 'ext': 'mp4', - 'title': 'Linux Ubuntu Studio perus käyttö', - 'description': 'md5:46077d0daaba1974f2dc381257f9d64c', - 'uploader': 'jyrilehtonen', - 'upload_date': '20210825', - }, - }, { - 'url': 'https://tokentube.net/view?v=3582463289', - 'info_dict': { - 'id': '3582463289', - 'ext': 'mp4', - 'title': 'Police for Freedom - toiminta aloitetaan Suomessa ❤️??', - 'description': 'md5:37ebf1cb44264e0bf23ed98b337ee63e', - 'uploader': 'Voitontie', - 'upload_date': '20210428', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex(r'<h1\s*class=["\']title-text["\']>(.+?)</h1>', webpage, 'title') - - data_json = self._html_search_regex(r'({["\']html5["\'].+?}}}+)', webpage, 'data json') - data_json = self._parse_json(js_to_json(data_json), video_id, fatal=False) - - sources = data_json.get('sources') or self._parse_json( - self._html_search_regex(r'updateSrc\(([^\)]+)\)', webpage, 'sources'), - video_id, transform_source=js_to_json) - - formats = [{ - 'url': format.get('src'), - 'format_id': format.get('label'), - 'height': format.get('res'), - } for format in sources] - - view_count = parse_count(self._html_search_regex( - r'<p\s*class=["\']views_counter["\']>\s*([\d\.,]+)\s*<span>views?</span></p>', - webpage, 'view_count', fatal=False)) - - like_count = parse_count(self._html_search_regex( - r'<div\s*class="sh_button\s*likes_count">\s*(\d+)\s*</div>', - webpage, 'like count', fatal=False)) - - dislike_count = parse_count(self._html_search_regex( - r'<div\s*class="sh_button\s*dislikes_count">\s*(\d+)\s*</div>', - webpage, 'dislike count', fatal=False)) - - upload_date = unified_strdate(self._html_search_regex( - r'<span\s*class="p-date">Published\s*on\s+([^<]+)', - webpage, 'upload date', fatal=False)) - - uploader = self._html_search_regex( - r'<a\s*class="place-left"[^>]+>(.+?)</a>', - webpage, 'uploader', fatal=False) - - description = (clean_html(get_element_by_class('p-d-txt', webpage)) - or self._html_search_meta(('og:description', 'description', 'twitter:description'), webpage)) - - description = remove_end(description, 'Category') - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'view_count': view_count, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'upload_date': upload_date, - 'description': description, - 'uploader': uploader, - } - - -class TokentubeChannelIE(InfoExtractor): - _PAGE_SIZE = 20 - IE_NAME = 'Tokentube:channel' - _VALID_URL = r'https?://(?:www\.)?tokentube\.net/channel/(?P<id>\d+)/[^/]+(?:/videos)?' - _TESTS = [{ - 'url': 'https://tokentube.net/channel/3697658904/TokenTube', - 'info_dict': { - 'id': '3697658904', - }, - 'playlist_mincount': 7, - }, { - 'url': 'https://tokentube.net/channel/3353234420/Linux/videos', - 'info_dict': { - 'id': '3353234420', - }, - 'playlist_mincount': 20, - }, { - 'url': 'https://tokentube.net/channel/3475834195/Voitontie', - 'info_dict': { - 'id': '3475834195', - }, - 'playlist_mincount': 150, - }] - - def _fetch_page(self, channel_id, page): - page += 1 - videos_info = self._download_webpage( - f'https://tokentube.net/videos?p=0&m=1&sort=recent&u={channel_id}&page={page}', - channel_id, headers={'X-Requested-With': 'XMLHttpRequest'}, - note=f'Downloading page {page}', fatal=False) - if '</i> Sorry, no results were found.' not in videos_info: - for path, media_id in re.findall( - r'<a[^>]+\bhref=["\']([^"\']+/[lv]/(\d+)/\S+)["\'][^>]+>', - videos_info): - yield self.url_result(path, ie=TokentubeIE.ie_key(), video_id=media_id) - - def _real_extract(self, url): - channel_id = self._match_id(url) - - entries = OnDemandPagedList(functools.partial( - self._fetch_page, channel_id), self._PAGE_SIZE) - - return self.playlist_result(entries, channel_id) diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py index bc7336186..aa7ee6c48 100644 --- a/yt_dlp/extractor/toypics.py +++ b/yt_dlp/extractor/toypics.py @@ -3,6 +3,7 @@ class ToypicsIE(InfoExtractor): + _WORKING = False IE_DESC = 'Toypics video' _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)' _TEST = { @@ -43,6 +44,7 @@ def _real_extract(self, url): class ToypicsUserIE(InfoExtractor): + _WORKING = False IE_DESC = 'Toypics user profile' _VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)' _TEST = { diff --git a/yt_dlp/extractor/trilulilu.py b/yt_dlp/extractor/trilulilu.py deleted file mode 100644 index fb97be737..000000000 --- a/yt_dlp/extractor/trilulilu.py +++ /dev/null @@ -1,100 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - parse_iso8601, -) - - -class TriluliluIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m)\.)?trilulilu\.ro/(?:[^/]+/)?(?P<id>[^/#\?]+)' - _TESTS = [{ - 'url': 'http://www.trilulilu.ro/big-buck-bunny-1', - 'md5': '68da087b676a6196a413549212f60cc6', - 'info_dict': { - 'id': 'ae2899e124140b', - 'ext': 'mp4', - 'title': 'Big Buck Bunny', - 'description': ':) pentru copilul din noi', - 'uploader_id': 'chipy', - 'upload_date': '20120304', - 'timestamp': 1330830647, - 'uploader': 'chipy', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - }, { - 'url': 'http://www.trilulilu.ro/adena-ft-morreti-inocenta', - 'md5': '929dfb8729dc71750463af88bbbbf4a4', - 'info_dict': { - 'id': 'f299710e3c91c5', - 'ext': 'mp4', - 'title': 'Adena ft. Morreti - Inocenta', - 'description': 'pop music', - 'uploader_id': 'VEVOmixt', - 'upload_date': '20151204', - 'uploader': 'VEVOmixt', - 'timestamp': 1449187937, - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - media_info = self._download_json('http://m.trilulilu.ro/%s?format=json' % display_id, display_id) - - age_limit = 0 - errors = media_info.get('errors', {}) - if errors.get('friends'): - raise ExtractorError('This video is private.', expected=True) - elif errors.get('geoblock'): - raise ExtractorError('This video is not available in your country.', expected=True) - elif errors.get('xxx_unlogged'): - age_limit = 18 - - media_class = media_info.get('class') - if media_class not in ('video', 'audio'): - raise ExtractorError('not a video or an audio') - - user = media_info.get('user', {}) - - thumbnail = media_info.get('cover_url') - if thumbnail: - thumbnail.format(width='1600', height='1200') - - # TODO: get correct ext for audio files - stream_type = media_info.get('stream_type') - formats = [{ - 'url': media_info['href'], - 'ext': stream_type, - }] - if media_info.get('is_hd'): - formats.append({ - 'format_id': 'hd', - 'url': media_info['hrefhd'], - 'ext': stream_type, - }) - if media_class == 'audio': - formats[0]['vcodec'] = 'none' - else: - formats[0]['format_id'] = 'sd' - - return { - 'id': media_info['identifier'].split('|')[1], - 'display_id': display_id, - 'formats': formats, - 'title': media_info['title'], - 'description': media_info.get('description'), - 'thumbnail': thumbnail, - 'uploader_id': user.get('username'), - 'uploader': user.get('fullname'), - 'timestamp': parse_iso8601(media_info.get('published'), ' '), - 'duration': int_or_none(media_info.get('duration')), - 'view_count': int_or_none(media_info.get('count_views')), - 'like_count': int_or_none(media_info.get('count_likes')), - 'comment_count': int_or_none(media_info.get('count_comments')), - 'age_limit': age_limit, - } diff --git a/yt_dlp/extractor/tube8.py b/yt_dlp/extractor/tube8.py index 77ed05ffd..5f15b4581 100644 --- a/yt_dlp/extractor/tube8.py +++ b/yt_dlp/extractor/tube8.py @@ -1,13 +1,20 @@ import re +from .common import InfoExtractor +from ..aes import aes_decrypt_text +from ..compat import compat_urllib_parse_unquote from ..utils import ( + determine_ext, + format_field, int_or_none, str_to_int, + strip_or_none, + url_or_none, ) -from .keezmovies import KeezMoviesIE -class Tube8IE(KeezMoviesIE): # XXX: Do not subclass from concrete IE +class Tube8IE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)' _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)'] _TESTS = [{ @@ -30,6 +37,90 @@ class Tube8IE(KeezMoviesIE): # XXX: Do not subclass from concrete IE 'only_matching': True, }] + def _extract_info(self, url, fatal=True): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + display_id = (mobj.group('display_id') + if 'display_id' in mobj.groupdict() + else None) or mobj.group('id') + + webpage = self._download_webpage( + url, display_id, headers={'Cookie': 'age_verified=1'}) + + formats = [] + format_urls = set() + + title = None + thumbnail = None + duration = None + encrypted = False + + def extract_format(format_url, height=None): + format_url = url_or_none(format_url) + if not format_url or not format_url.startswith(('http', '//')): + return + if format_url in format_urls: + return + format_urls.add(format_url) + tbr = int_or_none(self._search_regex( + r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None)) + if not height: + height = int_or_none(self._search_regex( + r'[/_](\d+)[pP][/_]', format_url, 'height', default=None)) + if encrypted: + format_url = aes_decrypt_text( + video_url, title, 32).decode('utf-8') + formats.append({ + 'url': format_url, + 'format_id': format_field(height, None, '%dp'), + 'height': height, + 'tbr': tbr, + }) + + flashvars = self._parse_json( + self._search_regex( + r'flashvars\s*=\s*({.+?});', webpage, + 'flashvars', default='{}'), + display_id, fatal=False) + + if flashvars: + title = flashvars.get('video_title') + thumbnail = flashvars.get('image_url') + duration = int_or_none(flashvars.get('video_duration')) + encrypted = flashvars.get('encrypted') is True + for key, value in flashvars.items(): + mobj = re.search(r'quality_(\d+)[pP]', key) + if mobj: + extract_format(value, int(mobj.group(1))) + video_url = flashvars.get('video_url') + if video_url and determine_ext(video_url, None): + extract_format(video_url) + + video_url = self._html_search_regex( + r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1', + webpage, 'video url', default=None, group='url') + if video_url: + extract_format(compat_urllib_parse_unquote(video_url)) + + if not formats: + if 'title="This video is no longer available"' in webpage: + self.raise_no_formats( + 'Video %s is no longer available' % video_id, expected=True) + + if not title: + title = self._html_search_regex( + r'<h1[^>]*>([^<]+)', webpage, 'title') + + return webpage, { + 'id': video_id, + 'display_id': display_id, + 'title': strip_or_none(title), + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': 18, + 'formats': formats, + } + def _real_extract(self, url): webpage, info = self._extract_info(url) diff --git a/yt_dlp/extractor/tunepk.py b/yt_dlp/extractor/tunepk.py deleted file mode 100644 index e4e507b00..000000000 --- a/yt_dlp/extractor/tunepk.py +++ /dev/null @@ -1,87 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - try_get, - unified_timestamp, -) - - -class TunePkIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - (?:www\.)?tune\.pk/(?:video/|player/embed_player.php?.*?\bvid=)| - embed\.tune\.pk/play/ - ) - (?P<id>\d+) - ''' - _TESTS = [{ - 'url': 'https://tune.pk/video/6919541/maudie-2017-international-trailer-1-ft-ethan-hawke-sally-hawkins', - 'md5': '0c537163b7f6f97da3c5dd1e3ef6dd55', - 'info_dict': { - 'id': '6919541', - 'ext': 'mp4', - 'title': 'Maudie (2017) | International Trailer # 1 ft Ethan Hawke, Sally Hawkins', - 'description': 'md5:eb5a04114fafef5cec90799a93a2d09c', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1487327564, - 'upload_date': '20170217', - 'uploader': 'Movie Trailers', - 'duration': 107, - 'view_count': int, - } - }, { - 'url': 'https://tune.pk/player/embed_player.php?vid=6919541&folder=2017/02/17/&width=600&height=350&autoplay=no', - 'only_matching': True, - }, { - 'url': 'https://embed.tune.pk/play/6919541?autoplay=no&ssl=yes&inline=true', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://tune.pk/video/%s' % video_id, video_id) - - details = self._parse_json( - self._search_regex( - r'new\s+TunePlayer\(({.+?})\)\s*;\s*\n', webpage, 'tune player'), - video_id)['details'] - - video = details['video'] - title = video.get('title') or self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'title', webpage, 'title', fatal=True) - - formats = self._parse_jwplayer_formats( - details['player']['sources'], video_id) - - description = self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'description', webpage, 'description') - - thumbnail = video.get('thumb') or self._og_search_thumbnail( - webpage, default=None) or self._html_search_meta( - 'thumbnail', webpage, 'thumbnail') - - timestamp = unified_timestamp(video.get('date_added')) - uploader = try_get( - video, lambda x: x['uploader']['name'], - compat_str) or self._html_search_meta('author', webpage, 'author') - - duration = int_or_none(video.get('duration')) - view_count = int_or_none(video.get('views')) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'uploader': uploader, - 'duration': duration, - 'view_count': view_count, - 'formats': formats, - } diff --git a/yt_dlp/extractor/tvnet.py b/yt_dlp/extractor/tvnet.py deleted file mode 100644 index 77426f7e6..000000000 --- a/yt_dlp/extractor/tvnet.py +++ /dev/null @@ -1,138 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unescapeHTML, - url_or_none, -) - - -class TVNetIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)' - _TESTS = [{ - # video - 'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h', - 'md5': 'b4d7abe0252c9b47774760b7519c7558', - 'info_dict': { - 'id': '109788', - 'ext': 'mp4', - 'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang', - 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', - 'is_live': False, - 'view_count': int, - }, - }, { - # audio - 'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi', - 'md5': 'b5875ce9b0a2eecde029216d0e6db2ae', - 'info_dict': { - 'id': '27017', - 'ext': 'm4a', - 'title': 'VOV1 - Bản tin chiều (10/06/2018)', - 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', - 'is_live': False, - }, - }, { - 'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705', - 'info_dict': { - 'id': '129999', - 'ext': 'mp4', - 'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)', - 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', - 'is_live': False, - }, - 'params': { - 'skip_download': True, - }, - }, { - # live stream - 'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1', - 'info_dict': { - 'id': '1011', - 'ext': 'mp4', - 'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - }, - }, { - # radio live stream - 'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014', - 'info_dict': { - 'id': '1014', - 'ext': 'm4a', - 'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'title', webpage, default=None) or self._search_regex( - r'<title>([^<]+)<', webpage, 'title') - title = re.sub(r'\s*-\s*TV Net\s*$', '', title) - - if '/video/' in url or '/radio/' in url: - is_live = False - elif '/kenh-truyen-hinh/' in url: - is_live = True - else: - is_live = None - - data_file = unescapeHTML(self._search_regex( - r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, - 'data file', group='url')) - - stream_urls = set() - formats = [] - for stream in self._download_json(data_file, video_id): - if not isinstance(stream, dict): - continue - stream_url = url_or_none(stream.get('url')) - if stream_url in stream_urls or not stream_url: - continue - stream_urls.add(stream_url) - formats.extend(self._extract_m3u8_formats( - stream_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)) - - # better support for radio streams - if title.startswith('VOV'): - for f in formats: - f.update({ - 'ext': 'm4a', - 'vcodec': 'none', - }) - - thumbnail = self._og_search_thumbnail( - webpage, default=None) or unescapeHTML( - self._search_regex( - r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, - 'thumbnail', default=None, group='url')) - - view_count = int_or_none(self._search_regex( - r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>', - webpage, 'view count', default=None)) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'is_live': is_live, - 'view_count': view_count, - 'formats': formats, - } diff --git a/yt_dlp/extractor/tvnow.py b/yt_dlp/extractor/tvnow.py deleted file mode 100644 index 0acc306df..000000000 --- a/yt_dlp/extractor/tvnow.py +++ /dev/null @@ -1,639 +0,0 @@ -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - get_element_by_id, - int_or_none, - parse_iso8601, - parse_duration, - str_or_none, - try_get, - update_url_query, - urljoin, -) - - -class TVNowBaseIE(InfoExtractor): - _VIDEO_FIELDS = ( - 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', - 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode', - 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear', - 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo') - - def _call_api(self, path, video_id, query): - return self._download_json( - 'https://api.tvnow.de/v3/' + path, video_id, query=query) - - def _extract_video(self, info, display_id): - video_id = compat_str(info['id']) - title = info['title'] - - paths = [] - for manifest_url in (info.get('manifest') or {}).values(): - if not manifest_url: - continue - manifest_url = update_url_query(manifest_url, {'filter': ''}) - path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path') - if path in paths: - continue - paths.append(path) - - def url_repl(proto, suffix): - return re.sub( - r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub( - r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', - '.ism/' + suffix, manifest_url)) - - def make_urls(proto, suffix): - urls = [url_repl(proto, suffix)] - hd_url = urls[0].replace('/manifest/', '/ngvod/') - if hd_url != urls[0]: - urls.append(hd_url) - return urls - - for man_url in make_urls('dash', '.mpd'): - formats = self._extract_mpd_formats( - man_url, video_id, mpd_id='dash', fatal=False) - for man_url in make_urls('hss', 'Manifest'): - formats.extend(self._extract_ism_formats( - man_url, video_id, ism_id='mss', fatal=False)) - for man_url in make_urls('hls', '.m3u8'): - formats.extend(self._extract_m3u8_formats( - man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', - fatal=False)) - if formats: - break - else: - if not self.get_param('allow_unplayable_formats') and info.get('isDrm'): - raise ExtractorError( - 'Video %s is DRM protected' % video_id, expected=True) - if info.get('geoblocked'): - raise self.raise_geo_restricted() - if not info.get('free', True): - raise ExtractorError( - 'Video %s is not available for free' % video_id, expected=True) - - description = info.get('articleLong') or info.get('articleShort') - timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') - duration = parse_duration(info.get('duration')) - - f = info.get('format', {}) - - thumbnails = [{ - 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id, - }] - thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') - if thumbnail: - thumbnails.append({ - 'url': thumbnail, - }) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'timestamp': timestamp, - 'duration': duration, - 'series': f.get('title'), - 'season_number': int_or_none(info.get('season')), - 'episode_number': int_or_none(info.get('episode')), - 'episode': title, - 'formats': formats, - } - - -class TVNowIE(TVNowBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/ - (?P<show_id>[^/]+)/ - (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+) - ''' - - @classmethod - def suitable(cls, url): - return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url) - else super(TVNowIE, cls).suitable(url)) - - _TESTS = [{ - 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player', - 'info_dict': { - 'id': '331082', - 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', - 'ext': 'mp4', - 'title': 'Der neue Porsche 911 GT 3', - 'description': 'md5:6143220c661f9b0aae73b245e5d898bb', - 'timestamp': 1495994400, - 'upload_date': '20170528', - 'duration': 5283, - 'series': 'GRIP - Das Motormagazin', - 'season_number': 14, - 'episode_number': 405, - 'episode': 'Der neue Porsche 911 GT 3', - }, - }, { - # rtl2 - 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player', - 'only_matching': True, - }, { - # rtlnitro - 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player', - 'only_matching': True, - }, { - # superrtl - 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player', - 'only_matching': True, - }, { - # ntv - 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player', - 'only_matching': True, - }, { - # vox - 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player', - 'only_matching': True, - }, { - # rtlplus - 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player', - 'only_matching': True, - }, { - 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - display_id = '%s/%s' % mobj.group(2, 3) - - info = self._call_api( - 'movies/' + display_id, display_id, query={ - 'fields': ','.join(self._VIDEO_FIELDS), - }) - - return self._extract_video(info, display_id) - - -class TVNowNewIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?P<base_url>https?:// - (?:www\.)?tvnow\.(?:de|at|ch)/ - (?:shows|serien))/ - (?P<show>[^/]+)-\d+/ - [^/]+/ - episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+) - ''' - - _TESTS = [{ - 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url')) - show, episode = mobj.group('show', 'episode') - return self.url_result( - # Rewrite new URLs to the old format and use extraction via old API - # at api.tvnow.de as a loophole for bypassing premium content checks - '%s/%s/%s' % (base_url, show, episode), - ie=TVNowIE.ie_key(), video_id=mobj.group('id')) - - -class TVNowFilmIE(TVNowBaseIE): - _VALID_URL = r'''(?x) - (?P<base_url>https?:// - (?:www\.)?tvnow\.(?:de|at|ch)/ - (?:filme))/ - (?P<title>[^/?$&]+)-(?P<id>\d+) - ''' - _TESTS = [{ - 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959', - 'info_dict': { - 'id': '1426690', - 'display_id': 'lord-of-war-haendler-des-todes', - 'ext': 'mp4', - 'title': 'Lord of War', - 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9', - 'timestamp': 1550010000, - 'upload_date': '20190212', - 'duration': 7016, - }, - }, { - 'url': 'https://www.tvnow.de/filme/the-machinist-12157', - 'info_dict': { - 'id': '328160', - 'display_id': 'the-machinist', - 'ext': 'mp4', - 'title': 'The Machinist', - 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28', - 'timestamp': 1496469720, - 'upload_date': '20170603', - 'duration': 5836, - }, - }, { - 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777', - 'only_matching': True, # DRM protected - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - display_id = mobj.group('title') - - webpage = self._download_webpage(url, display_id, fatal=False) - if not webpage: - raise ExtractorError('Cannot download "%s"' % url, expected=True) - - json_text = get_element_by_id('now-web-state', webpage) - if not json_text: - raise ExtractorError('Cannot read video data', expected=True) - - json_data = self._parse_json( - json_text, - display_id, - transform_source=lambda x: x.replace('&q;', '"'), - fatal=False) - if not json_data: - raise ExtractorError('Cannot read video data', expected=True) - - player_key = next( - (key for key in json_data.keys() if 'module/player' in key), - None) - page_key = next( - (key for key in json_data.keys() if 'page/filme' in key), - None) - movie_id = try_get( - json_data, - [ - lambda x: x[player_key]['body']['id'], - lambda x: x[page_key]['body']['modules'][0]['id'], - lambda x: x[page_key]['body']['modules'][1]['id']], - int) - if not movie_id: - raise ExtractorError('Cannot extract movie ID', expected=True) - - info = self._call_api( - 'movies/%d' % movie_id, - display_id, - query={'fields': ','.join(self._VIDEO_FIELDS)}) - - return self._extract_video(info, display_id) - - -class TVNowNewBaseIE(InfoExtractor): - def _call_api(self, path, video_id, query={}): - result = self._download_json( - 'https://apigw.tvnow.de/module/' + path, video_id, query=query) - error = result.get('error') - if error: - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error), expected=True) - return result - - -r""" -TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it -when api.tvnow.de is shut down. This version can't bypass premium checks though. -class TVNowIE(TVNowNewBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?tvnow\.(?:de|at|ch)/ - (?:shows|serien)/[^/]+/ - (?:[^/]+/)+ - (?P<display_id>[^/?$&]+)-(?P<id>\d+) - ''' - - _TESTS = [{ - # episode with annual navigation - 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', - 'info_dict': { - 'id': '331082', - 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', - 'ext': 'mp4', - 'title': 'Der neue Porsche 911 GT 3', - 'description': 'md5:6143220c661f9b0aae73b245e5d898bb', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1495994400, - 'upload_date': '20170528', - 'duration': 5283, - 'series': 'GRIP - Das Motormagazin', - 'season_number': 14, - 'episode_number': 405, - 'episode': 'Der neue Porsche 911 GT 3', - }, - }, { - # rtl2, episode with season navigation - 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124', - 'only_matching': True, - }, { - # rtlnitro - 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822', - 'only_matching': True, - }, { - # superrtl - 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120', - 'only_matching': True, - }, { - # ntv - 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630', - 'only_matching': True, - }, { - # vox - 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072', - 'only_matching': True, - }, { - 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', - 'only_matching': True, - }] - - def _extract_video(self, info, url, display_id): - config = info['config'] - source = config['source'] - - video_id = compat_str(info.get('id') or source['videoId']) - title = source['title'].strip() - - paths = [] - for manifest_url in (info.get('manifest') or {}).values(): - if not manifest_url: - continue - manifest_url = update_url_query(manifest_url, {'filter': ''}) - path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path') - if path in paths: - continue - paths.append(path) - - def url_repl(proto, suffix): - return re.sub( - r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub( - r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', - '.ism/' + suffix, manifest_url)) - - formats = self._extract_mpd_formats( - url_repl('dash', '.mpd'), video_id, - mpd_id='dash', fatal=False) - formats.extend(self._extract_ism_formats( - url_repl('hss', 'Manifest'), - video_id, ism_id='mss', fatal=False)) - formats.extend(self._extract_m3u8_formats( - url_repl('hls', '.m3u8'), video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - if formats: - break - else: - if try_get(info, lambda x: x['rights']['isDrm']): - raise ExtractorError( - 'Video %s is DRM protected' % video_id, expected=True) - if try_get(config, lambda x: x['boards']['geoBlocking']['block']): - raise self.raise_geo_restricted() - if not info.get('free', True): - raise ExtractorError( - 'Video %s is not available for free' % video_id, expected=True) - - description = source.get('description') - thumbnail = url_or_none(source.get('poster')) - timestamp = unified_timestamp(source.get('previewStart')) - duration = parse_duration(source.get('length')) - - series = source.get('format') - season_number = int_or_none(self._search_regex( - r'staffel-(\d+)', url, 'season number', default=None)) - episode_number = int_or_none(self._search_regex( - r'episode-(\d+)', url, 'episode number', default=None)) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'series': series, - 'season_number': season_number, - 'episode_number': episode_number, - 'episode': title, - 'formats': formats, - } - - def _real_extract(self, url): - display_id, video_id = self._match_valid_url(url).groups() - info = self._call_api('player/' + video_id, video_id) - return self._extract_video(info, video_id, display_id) - - -class TVNowFilmIE(TVNowIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'''(?x) - (?P<base_url>https?:// - (?:www\.)?tvnow\.(?:de|at|ch)/ - (?:filme))/ - (?P<title>[^/?$&]+)-(?P<id>\d+) - ''' - _TESTS = [{ - 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959', - 'info_dict': { - 'id': '1426690', - 'display_id': 'lord-of-war-haendler-des-todes', - 'ext': 'mp4', - 'title': 'Lord of War', - 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9', - 'timestamp': 1550010000, - 'upload_date': '20190212', - 'duration': 7016, - }, - }, { - 'url': 'https://www.tvnow.de/filme/the-machinist-12157', - 'info_dict': { - 'id': '328160', - 'display_id': 'the-machinist', - 'ext': 'mp4', - 'title': 'The Machinist', - 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28', - 'timestamp': 1496469720, - 'upload_date': '20170603', - 'duration': 5836, - }, - }, { - 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777', - 'only_matching': True, # DRM protected - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - display_id = mobj.group('title') - - webpage = self._download_webpage(url, display_id, fatal=False) - if not webpage: - raise ExtractorError('Cannot download "%s"' % url, expected=True) - - json_text = get_element_by_id('now-web-state', webpage) - if not json_text: - raise ExtractorError('Cannot read video data', expected=True) - - json_data = self._parse_json( - json_text, - display_id, - transform_source=lambda x: x.replace('&q;', '"'), - fatal=False) - if not json_data: - raise ExtractorError('Cannot read video data', expected=True) - - player_key = next( - (key for key in json_data.keys() if 'module/player' in key), - None) - page_key = next( - (key for key in json_data.keys() if 'page/filme' in key), - None) - movie_id = try_get( - json_data, - [ - lambda x: x[player_key]['body']['id'], - lambda x: x[page_key]['body']['modules'][0]['id'], - lambda x: x[page_key]['body']['modules'][1]['id']], - int) - if not movie_id: - raise ExtractorError('Cannot extract movie ID', expected=True) - - info = self._call_api('player/%d' % movie_id, display_id) - return self._extract_video(info, url, display_id) -""" - - -class TVNowListBaseIE(TVNowNewBaseIE): - _SHOW_VALID_URL = r'''(?x) - (?P<base_url> - https?:// - (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/ - [^/?#&]+-(?P<show_id>\d+) - ) - ''' - - @classmethod - def suitable(cls, url): - return (False if TVNowNewIE.suitable(url) - else super(TVNowListBaseIE, cls).suitable(url)) - - def _extract_items(self, url, show_id, list_id, query): - items = self._call_api( - 'teaserrow/format/episode/' + show_id, list_id, - query=query)['items'] - - entries = [] - for item in items: - if not isinstance(item, dict): - continue - item_url = urljoin(url, item.get('url')) - if not item_url: - continue - video_id = str_or_none(item.get('id') or item.get('videoId')) - item_title = item.get('subheadline') or item.get('text') - entries.append(self.url_result( - item_url, ie=TVNowNewIE.ie_key(), video_id=video_id, - video_title=item_title)) - - return self.playlist_result(entries, '%s/%s' % (show_id, list_id)) - - -class TVNowSeasonIE(TVNowListBaseIE): - _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL - _TESTS = [{ - 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13', - 'info_dict': { - 'id': '1815/13', - }, - 'playlist_mincount': 22, - }] - - def _real_extract(self, url): - _, show_id, season_id = self._match_valid_url(url).groups() - return self._extract_items( - url, show_id, season_id, {'season': season_id}) - - -class TVNowAnnualIE(TVNowListBaseIE): - _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL - _TESTS = [{ - 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05', - 'info_dict': { - 'id': '1669/2017-05', - }, - 'playlist_mincount': 2, - }] - - def _real_extract(self, url): - _, show_id, year, month = self._match_valid_url(url).groups() - return self._extract_items( - url, show_id, '%s-%s' % (year, month), { - 'year': int(year), - 'month': int(month), - }) - - -class TVNowShowIE(TVNowListBaseIE): - _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL - _TESTS = [{ - # annual navigationType - 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669', - 'info_dict': { - 'id': '1669', - }, - 'playlist_mincount': 73, - }, { - # season navigationType - 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471', - 'info_dict': { - 'id': '11471', - }, - 'playlist_mincount': 3, - }] - - @classmethod - def suitable(cls, url): - return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) - else super(TVNowShowIE, cls).suitable(url)) - - def _real_extract(self, url): - base_url, show_id = self._match_valid_url(url).groups() - - result = self._call_api( - 'teaserrow/format/navigation/' + show_id, show_id) - - items = result['items'] - - entries = [] - navigation = result.get('navigationType') - if navigation == 'annual': - for item in items: - if not isinstance(item, dict): - continue - year = int_or_none(item.get('year')) - if year is None: - continue - months = item.get('months') - if not isinstance(months, list): - continue - for month_dict in months: - if not isinstance(month_dict, dict) or not month_dict: - continue - month_number = int_or_none(list(month_dict.keys())[0]) - if month_number is None: - continue - entries.append(self.url_result( - '%s/%04d-%02d' % (base_url, year, month_number), - ie=TVNowAnnualIE.ie_key())) - elif navigation == 'season': - for item in items: - if not isinstance(item, dict): - continue - season_number = int_or_none(item.get('season')) - if season_number is None: - continue - entries.append(self.url_result( - '%s/staffel-%d' % (base_url, season_number), - ie=TVNowSeasonIE.ie_key())) - else: - raise ExtractorError('Unknown navigationType') - - return self.playlist_result(entries, show_id) diff --git a/yt_dlp/extractor/twentyfourvideo.py b/yt_dlp/extractor/twentyfourvideo.py deleted file mode 100644 index baeb85d47..000000000 --- a/yt_dlp/extractor/twentyfourvideo.py +++ /dev/null @@ -1,128 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - parse_iso8601, - int_or_none, - xpath_attr, - xpath_element, -) - - -class TwentyFourVideoIE(InfoExtractor): - IE_NAME = '24video' - _VALID_URL = r'''(?x) - https?:// - (?P<host> - (?:(?:www|porno?)\.)?24video\. - (?:net|me|xxx|sexy?|tube|adult|site|vip) - )/ - (?: - video/(?:(?:view|xml)/)?| - player/new24_play\.swf\?id= - ) - (?P<id>\d+) - ''' - - _TESTS = [{ - 'url': 'http://www.24video.net/video/view/1044982', - 'md5': 'e09fc0901d9eaeedac872f154931deeb', - 'info_dict': { - 'id': '1044982', - 'ext': 'mp4', - 'title': 'Эротика каменного века', - 'description': 'Как смотрели порно в каменном веке.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'SUPERTELO', - 'duration': 31, - 'timestamp': 1275937857, - 'upload_date': '20100607', - 'age_limit': 18, - 'like_count': int, - 'dislike_count': int, - }, - }, { - 'url': 'http://www.24video.net/player/new24_play.swf?id=1044982', - 'only_matching': True, - }, { - 'url': 'http://www.24video.me/video/view/1044982', - 'only_matching': True, - }, { - 'url': 'http://www.24video.tube/video/view/2363750', - 'only_matching': True, - }, { - 'url': 'https://www.24video.site/video/view/2640421', - 'only_matching': True, - }, { - 'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle', - 'only_matching': True, - }, { - 'url': 'https://www.24video.vip/video/view/1044982', - 'only_matching': True, - }, { - 'url': 'https://porn.24video.net/video/2640421-vsya-takay', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - host = mobj.group('host') - - webpage = self._download_webpage( - 'http://%s/video/view/%s' % (host, video_id), video_id) - - title = self._og_search_title(webpage) - description = self._html_search_regex( - r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>', - webpage, 'description', fatal=False, group='description') - thumbnail = self._og_search_thumbnail(webpage) - duration = int_or_none(self._og_search_property( - 'duration', webpage, 'duration', fatal=False)) - timestamp = parse_iso8601(self._search_regex( - r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"', - webpage, 'upload date', fatal=False)) - - uploader = self._html_search_regex( - r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>', - webpage, 'uploader', fatal=False) - - view_count = int_or_none(self._html_search_regex( - r'<span class="video-views">(\d+) просмотр', - webpage, 'view count', fatal=False)) - comment_count = int_or_none(self._html_search_regex( - r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари', - webpage, 'comment count', default=None)) - - # Sets some cookies - self._download_xml( - r'http://%s/video/xml/%s?mode=init' % (host, video_id), - video_id, 'Downloading init XML') - - video_xml = self._download_xml( - 'http://%s/video/xml/%s?mode=play' % (host, video_id), - video_id, 'Downloading video XML') - - video = xpath_element(video_xml, './/video', 'video', fatal=True) - - formats = [{ - 'url': xpath_attr(video, '', 'url', 'video URL', fatal=True), - }] - - like_count = int_or_none(video.get('ratingPlus')) - dislike_count = int_or_none(video.get('ratingMinus')) - age_limit = 18 if video.get('adult') == 'true' else 0 - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'duration': duration, - 'timestamp': timestamp, - 'view_count': view_count, - 'comment_count': comment_count, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'age_limit': age_limit, - 'formats': formats, - } diff --git a/yt_dlp/extractor/unscripted.py b/yt_dlp/extractor/unscripted.py deleted file mode 100644 index 6643a71b1..000000000 --- a/yt_dlp/extractor/unscripted.py +++ /dev/null @@ -1,53 +0,0 @@ -from .common import InfoExtractor -from ..utils import parse_duration, traverse_obj - - -class UnscriptedNewsVideoIE(InfoExtractor): - _VALID_URL = r'https?://www\.unscripted\.news/videos/(?P<id>[\w-]+)' - _TESTS = [{ - 'url': 'https://www.unscripted.news/videos/a-day-at-the-farmers-protest', - 'info_dict': { - 'id': '60c0a55cd1e99b1079918a57', - 'display_id': 'a-day-at-the-farmers-protest', - 'ext': 'mp4', - 'title': 'A Day at the Farmers\' Protest', - 'description': 'md5:4b3df22747a03e8f14f746dd72190384', - 'thumbnail': 'https://s3.unscripted.news/anj2/60c0a55cd1e99b1079918a57/5f199a65-c803-4a5c-8fce-2077359c3b72.jpg', - 'duration': 2251.0, - 'series': 'Ground Reports', - } - }, { - 'url': 'https://www.unscripted.news/videos/you-get-the-politicians-you-deserve-ft-shashi-tharoor', - 'info_dict': { - 'id': '5fb3afbf18ac817d341a74d8', - 'display_id': 'you-get-the-politicians-you-deserve-ft-shashi-tharoor', - 'ext': 'mp4', - 'cast': ['Avalok Langer', 'Ashwin Mehta'], - 'thumbnail': 'https://s3.unscripted.news/anj2/5fb3afbf18ac817d341a74d8/82bd7942-4f20-4cd8-98ae-83f9e814f998.jpg', - 'description': 'md5:1e91b069238a705ca3a40f87e6f1182c', - 'duration': 1046.0, - 'series': 'Dumb Questions Only', - 'title': 'You Get The Politicians You Deserve! ft. Shashi Tharoor', - } - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['dataLocal'] - - # TODO: get subtitle from srt key - formats, subtitles = self._extract_m3u8_formats_and_subtitles(nextjs_data['alt_content'], display_id) - - return { - 'id': nextjs_data['_id'], - 'display_id': display_id, - 'title': nextjs_data.get('title') or self._og_search_title(webpage), - 'description': nextjs_data.get('sh_heading') or self._og_search_description(webpage), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnail': self._og_search_thumbnail(webpage), - 'duration': parse_duration(nextjs_data.get('duration')), - 'series': traverse_obj(nextjs_data, ('show', 'topic')), - 'cast': traverse_obj(nextjs_data, ('cast_crew', ..., 'displayname')), - } diff --git a/yt_dlp/extractor/veehd.py b/yt_dlp/extractor/veehd.py deleted file mode 100644 index 5ecd88726..000000000 --- a/yt_dlp/extractor/veehd.py +++ /dev/null @@ -1,116 +0,0 @@ -import re -import json - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urlparse, -) -from ..utils import ( - ExtractorError, - clean_html, - get_element_by_id, -) - - -class VeeHDIE(InfoExtractor): - _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)' - - # Seems VeeHD videos have multiple copies on several servers, all of - # whom have different MD5 checksums, so omit md5 field in all tests - _TESTS = [{ - 'url': 'http://veehd.com/video/4639434_Solar-Sinter', - 'info_dict': { - 'id': '4639434', - 'ext': 'mp4', - 'title': 'Solar Sinter', - 'uploader_id': 'VideoEyes', - 'description': 'md5:46a840e8692ddbaffb5f81d9885cb457', - }, - 'skip': 'Video deleted', - }, { - 'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling', - 'info_dict': { - 'id': '4905758', - 'ext': 'mp4', - 'title': 'Elysian Fields - Channeling', - 'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b', - 'uploader_id': 'spotted', - } - }, { - 'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer', - 'info_dict': { - 'id': '2046729', - 'ext': 'avi', - 'title': '2012 (2009) DivX Trailer', - 'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b', - 'uploader_id': 'Movie_Trailers', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - # VeeHD seems to send garbage on the first request. - # See https://github.com/ytdl-org/youtube-dl/issues/2102 - self._download_webpage(url, video_id, 'Requesting webpage') - webpage = self._download_webpage(url, video_id) - - if 'This video has been removed<' in webpage: - raise ExtractorError('Video %s has been removed' % video_id, expected=True) - - player_path = self._search_regex( - r'\$\("#playeriframe"\).attr\({src : "(.+?)"', - webpage, 'player path') - player_url = compat_urlparse.urljoin(url, player_path) - - self._download_webpage(player_url, video_id, 'Requesting player page') - player_page = self._download_webpage( - player_url, video_id, 'Downloading player page') - - video_url = None - - config_json = self._search_regex( - r'value=\'config=({.+?})\'', player_page, 'config json', default=None) - - if config_json: - config = json.loads(config_json) - video_url = compat_urllib_parse_unquote(config['clip']['url']) - - if not video_url: - video_url = self._html_search_regex( - r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"', - player_page, 'video url', default=None) - - if not video_url: - iframe_src = self._search_regex( - r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url') - iframe_url = 'http://veehd.com/%s' % iframe_src - - self._download_webpage(iframe_url, video_id, 'Requesting iframe page') - iframe_page = self._download_webpage( - iframe_url, video_id, 'Downloading iframe page') - - video_url = self._search_regex( - r"file\s*:\s*'([^']+)'", iframe_page, 'video url') - - title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0]) - uploader_id = self._html_search_regex( - r'<a href="/profile/\d+">(.+?)</a>', - webpage, 'uploader') - thumbnail = self._search_regex( - r'<img id="veehdpreview" src="(.+?)"', - webpage, 'thumbnail') - description = self._html_search_regex( - r'<td class="infodropdown".*?<div>(.*?)<ul', - webpage, 'description', flags=re.DOTALL) - - return { - '_type': 'video', - 'id': video_id, - 'title': title, - 'url': video_url, - 'uploader_id': uploader_id, - 'thumbnail': thumbnail, - 'description': description, - } diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py index 8a7126853..1a2d667e7 100644 --- a/yt_dlp/extractor/vice.py +++ b/yt_dlp/extractor/vice.py @@ -302,12 +302,6 @@ def _url_res(video_url, ie_key): if vice_url: return _url_res(vice_url, ViceIE.ie_key()) - embed_code = self._search_regex( - r'embedCode=([^&\'"]+)', body, - 'ooyala embed code', default=None) - if embed_code: - return _url_res('ooyala:%s' % embed_code, 'Ooyala') - youtube_url = YoutubeIE._extract_url(body) if youtube_url: return _url_res(youtube_url, YoutubeIE.ie_key()) diff --git a/yt_dlp/extractor/vidbit.py b/yt_dlp/extractor/vidbit.py deleted file mode 100644 index 2813032db..000000000 --- a/yt_dlp/extractor/vidbit.py +++ /dev/null @@ -1,82 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - int_or_none, - js_to_json, - remove_end, - unified_strdate, -) - - -class VidbitIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)' - _TESTS = [{ - 'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2', - 'md5': '1a34b7f14defe3b8fafca9796892924d', - 'info_dict': { - 'id': 'jkL2yDOEq2', - 'ext': 'mp4', - 'title': 'Intro to VidBit', - 'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7', - 'thumbnail': r're:https?://.*\.jpg$', - 'upload_date': '20160618', - 'view_count': int, - 'comment_count': int, - } - }, { - 'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id) - - video_url, title = [None] * 2 - - config = self._parse_json(self._search_regex( - r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'), - video_id, transform_source=js_to_json) - if config: - if config.get('file'): - video_url = compat_urlparse.urljoin(url, config['file']) - title = config.get('title') - - if not video_url: - video_url = compat_urlparse.urljoin(url, self._search_regex( - r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', - webpage, 'video URL', group='url')) - - if not title: - title = remove_end( - self._html_search_regex( - (r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'), - webpage, 'title', default=None) or self._og_search_title(webpage), - ' - VidBit') - - description = self._html_search_meta( - ('description', 'og:description', 'twitter:description'), - webpage, 'description') - - upload_date = unified_strdate(self._html_search_meta( - 'datePublished', webpage, 'upload date')) - - view_count = int_or_none(self._search_regex( - r'<strong>(\d+)</strong> views', - webpage, 'view count', fatal=False)) - comment_count = int_or_none(self._search_regex( - r'id=["\']cmt_num["\'][^>]*>\((\d+)\)', - webpage, 'comment count', fatal=False)) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': self._og_search_thumbnail(webpage), - 'upload_date': upload_date, - 'view_count': view_count, - 'comment_count': comment_count, - } diff --git a/yt_dlp/extractor/vimple.py b/yt_dlp/extractor/vimple.py deleted file mode 100644 index fdccf465e..000000000 --- a/yt_dlp/extractor/vimple.py +++ /dev/null @@ -1,58 +0,0 @@ -from .common import InfoExtractor -from ..utils import int_or_none - - -class SprutoBaseIE(InfoExtractor): - def _extract_spruto(self, spruto, video_id): - playlist = spruto['playlist'][0] - title = playlist['title'] - video_id = playlist.get('videoId') or video_id - thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl') - duration = int_or_none(playlist.get('duration')) - - formats = [{ - 'url': f['url'], - } for f in playlist['video']] - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } - - -class VimpleIE(SprutoBaseIE): - IE_DESC = 'Vimple - one-click video hosting' - _VALID_URL = r'https?://(?:player\.vimple\.(?:ru|co)/iframe|vimple\.(?:ru|co))/(?P<id>[\da-f-]{32,36})' - _TESTS = [{ - 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', - 'md5': '2e750a330ed211d3fd41821c6ad9a279', - 'info_dict': { - 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', - 'ext': 'mp4', - 'title': 'Sunset', - 'duration': 20, - 'thumbnail': r're:https?://.*?\.jpg', - }, - }, { - 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', - 'only_matching': True, - }, { - 'url': 'http://vimple.co/04506a053f124483b8fb05ed73899f19', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://player.vimple.ru/iframe/%s' % video_id, video_id) - - spruto = self._parse_json( - self._search_regex( - r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'), - video_id) - - return self._extract_spruto(spruto, video_id) diff --git a/yt_dlp/extractor/vodlocker.py b/yt_dlp/extractor/vodlocker.py deleted file mode 100644 index b215d6c9d..000000000 --- a/yt_dlp/extractor/vodlocker.py +++ /dev/null @@ -1,73 +0,0 @@ -from .common import InfoExtractor -from ..networking import Request -from ..utils import NO_DEFAULT, ExtractorError, urlencode_postdata - - -class VodlockerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' - - _TESTS = [{ - 'url': 'http://vodlocker.com/e8wvyzz4sl42', - 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', - 'info_dict': { - 'id': 'e8wvyzz4sl42', - 'ext': 'mp4', - 'title': 'Germany vs Brazil', - 'thumbnail': r're:http://.*\.jpg', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - if any(p in webpage for p in ( - '>THIS FILE WAS DELETED<', - '>File Not Found<', - 'The file you were looking for could not be found, sorry for any inconvenience.<', - '>The file was removed')): - raise ExtractorError('Video %s does not exist' % video_id, expected=True) - - fields = self._hidden_inputs(webpage) - - if fields['op'] == 'download1': - self._sleep(3, video_id) # they do detect when requests happen too fast! - post = urlencode_postdata(fields) - req = Request(url, post) - req.headers['Content-type'] = 'application/x-www-form-urlencoded' - webpage = self._download_webpage( - req, video_id, 'Downloading video page') - - def extract_file_url(html, default=NO_DEFAULT): - return self._search_regex( - r'file:\s*"(http[^\"]+)",', html, 'file url', default=default) - - video_url = extract_file_url(webpage, default=None) - - if not video_url: - embed_url = self._search_regex( - r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1', - webpage, 'embed url', group='url') - embed_webpage = self._download_webpage( - embed_url, video_id, 'Downloading embed webpage') - video_url = extract_file_url(embed_webpage) - thumbnail_webpage = embed_webpage - else: - thumbnail_webpage = webpage - - title = self._search_regex( - r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title') - thumbnail = self._search_regex( - r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False) - - formats = [{ - 'format_id': 'sd', - 'url': video_url, - }] - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } diff --git a/yt_dlp/extractor/voicerepublic.py b/yt_dlp/extractor/voicerepublic.py deleted file mode 100644 index 47502afb4..000000000 --- a/yt_dlp/extractor/voicerepublic.py +++ /dev/null @@ -1,59 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - determine_ext, - int_or_none, - urljoin, -) - - -class VoiceRepublicIE(InfoExtractor): - _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)' - _TESTS = [{ - 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', - 'md5': 'b9174d651323f17783000876347116e3', - 'info_dict': { - 'id': '2296', - 'display_id': 'watching-the-watchers-building-a-sousveillance-state', - 'ext': 'm4a', - 'title': 'Watching the Watchers: Building a Sousveillance State', - 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.', - 'duration': 1556, - 'view_count': int, - } - }, { - 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - if '>Queued for processing, please stand by...<' in webpage: - raise ExtractorError( - 'Audio is still queued for processing', expected=True) - - talk = self._parse_json(self._search_regex( - r'initialSnapshot\s*=\s*({.+?});', - webpage, 'talk'), display_id)['talk'] - title = talk['title'] - formats = [{ - 'url': urljoin(url, talk_url), - 'format_id': format_id, - 'ext': determine_ext(talk_url) or format_id, - 'vcodec': 'none', - } for format_id, talk_url in talk['media_links'].items()] - - return { - 'id': compat_str(talk.get('id') or display_id), - 'display_id': display_id, - 'title': title, - 'description': talk.get('teaser'), - 'thumbnail': talk.get('image_url'), - 'duration': int_or_none(talk.get('archived_duration')), - 'view_count': int_or_none(talk.get('play_count')), - 'formats': formats, - } diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py index b19a27934..ef77bedd2 100644 --- a/yt_dlp/extractor/voot.py +++ b/yt_dlp/extractor/voot.py @@ -81,6 +81,7 @@ def _real_initialize(self): class VootIE(VootBaseIE): + _WORKING = False _VALID_URL = r'''(?x) (?: voot:| @@ -169,6 +170,7 @@ def _real_extract(self, url): class VootSeriesIE(VootBaseIE): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})' _TESTS = [{ 'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002', diff --git a/yt_dlp/extractor/voxmedia.py b/yt_dlp/extractor/voxmedia.py index f9362002f..f36908754 100644 --- a/yt_dlp/extractor/voxmedia.py +++ b/yt_dlp/extractor/voxmedia.py @@ -51,7 +51,7 @@ def _real_extract(self, url): info['duration'] = int_or_none(asset.get('duration')) return info - for provider_video_type in ('ooyala', 'youtube', 'brightcove'): + for provider_video_type in ('youtube', 'brightcove'): provider_video_id = video_data.get('%s_id' % provider_video_type) if not provider_video_id: continue @@ -177,7 +177,6 @@ def _real_extract(self, url): def create_entry(provider_video_id, provider_video_type, title=None, description=None): video_url = { 'youtube': '%s', - 'ooyala': 'ooyala:%s', 'volume': 'http://volume.vox-cdn.com/embed/%s', }[provider_video_type] % provider_video_id return { @@ -205,11 +204,6 @@ def create_entry(provider_video_id, provider_video_type, title=None, description provider_video_id, provider_video_type, video_data.get('title'), video_data.get('description'))) - provider_video_id = self._search_regex( - r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None) - if provider_video_id: - entries.append(create_entry(provider_video_id, 'ooyala')) - volume_uuid = self._search_regex( r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None) if volume_uuid: diff --git a/yt_dlp/extractor/vrak.py b/yt_dlp/extractor/vrak.py deleted file mode 100644 index 198c0a294..000000000 --- a/yt_dlp/extractor/vrak.py +++ /dev/null @@ -1,77 +0,0 @@ -import re - -from .common import InfoExtractor -from .brightcove import BrightcoveNewIE -from ..utils import ( - int_or_none, - parse_age_limit, - smuggle_url, - unescapeHTML, -) - - -class VrakIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?\btarget=(?P<id>[\d.]+)' - _TEST = { - 'url': 'http://www.vrak.tv/videos?target=1.2306782&filtre=emission&id=1.1806721', - 'info_dict': { - 'id': '5345661243001', - 'ext': 'mp4', - 'title': 'Obésité, film de hockey et Roseline Filion', - 'timestamp': 1488492126, - 'upload_date': '20170302', - 'uploader_id': '2890187628001', - 'creator': 'VRAK.TV', - 'age_limit': 8, - 'series': 'ALT (Actualité Légèrement Tordue)', - 'episode': 'Obésité, film de hockey et Roseline Filion', - 'tags': list, - }, - 'params': { - 'skip_download': True, - }, - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2890187628001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex( - r'<h\d\b[^>]+\bclass=["\']videoTitle["\'][^>]*>([^<]+)', - webpage, 'title', default=None) or self._og_search_title(webpage) - - content = self._parse_json( - self._search_regex( - r'data-player-options-content=(["\'])(?P<content>{.+?})\1', - webpage, 'content', default='{}', group='content'), - video_id, transform_source=unescapeHTML) - - ref_id = content.get('refId') or self._search_regex( - r'refId":"([^&]+)"', webpage, 'ref id') - - brightcove_id = self._search_regex( - r'''(?x) - java\.lang\.String\s+value\s*=\s*["']brightcove\.article\.\d+\.%s - [^>]* - java\.lang\.String\s+value\s*=\s*["'](\d+) - ''' % re.escape(ref_id), webpage, 'brightcove id') - - return { - '_type': 'url_transparent', - 'ie_key': BrightcoveNewIE.ie_key(), - 'url': smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, - {'geo_countries': ['CA']}), - 'id': brightcove_id, - 'description': content.get('description'), - 'creator': content.get('brand'), - 'age_limit': parse_age_limit(content.get('rating')), - 'series': content.get('showName') or content.get( - 'episodeName'), # this is intentional - 'season_number': int_or_none(content.get('seasonNumber')), - 'episode': title, - 'episode_number': int_or_none(content.get('episodeNumber')), - 'tags': content.get('tags', []), - } diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py deleted file mode 100644 index 523c442e6..000000000 --- a/yt_dlp/extractor/vrv.py +++ /dev/null @@ -1,269 +0,0 @@ -import base64 -import hashlib -import hmac -import json -import random -import string -import time -import urllib.parse - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - join_nonempty, - traverse_obj, -) - - -class VRVBaseIE(InfoExtractor): - _API_DOMAIN = None - _API_PARAMS = {} - _CMS_SIGNING = {} - _TOKEN = None - _TOKEN_SECRET = '' - - def _call_api(self, path, video_id, note, data=None): - # https://tools.ietf.org/html/rfc5849#section-3 - base_url = self._API_DOMAIN + '/core/' + path - query = [ - ('oauth_consumer_key', self._API_PARAMS['oAuthKey']), - ('oauth_nonce', ''.join(random.choices(string.ascii_letters, k=32))), - ('oauth_signature_method', 'HMAC-SHA1'), - ('oauth_timestamp', int(time.time())), - ] - if self._TOKEN: - query.append(('oauth_token', self._TOKEN)) - encoded_query = compat_urllib_parse_urlencode(query) - headers = self.geo_verification_headers() - if data: - data = json.dumps(data).encode() - headers['Content-Type'] = 'application/json' - base_string = '&'.join([ - 'POST' if data else 'GET', - urllib.parse.quote(base_url, ''), - urllib.parse.quote(encoded_query, '')]) - oauth_signature = base64.b64encode(hmac.new( - (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'), - base_string.encode(), hashlib.sha1).digest()).decode() - encoded_query += '&oauth_signature=' + urllib.parse.quote(oauth_signature, '') - try: - return self._download_json( - '?'.join([base_url, encoded_query]), video_id, - note='Downloading %s JSON metadata' % note, headers=headers, data=data) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 401: - raise ExtractorError(json.loads(e.cause.response.read().decode())['message'], expected=True) - raise - - def _call_cms(self, path, video_id, note): - if not self._CMS_SIGNING: - index = self._call_api('index', video_id, 'CMS Signing') - self._CMS_SIGNING = index.get('cms_signing') or {} - if not self._CMS_SIGNING: - for signing_policy in index.get('signing_policies', []): - signing_path = signing_policy.get('path') - if signing_path and signing_path.startswith('/cms/'): - name, value = signing_policy.get('name'), signing_policy.get('value') - if name and value: - self._CMS_SIGNING[name] = value - return self._download_json( - self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, - note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) - - def _get_cms_resource(self, resource_key, video_id): - return self._call_api( - 'cms_resource', video_id, 'resource path', data={ - 'resource_key': resource_key, - })['__links__']['cms_resource']['href'] - - def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): - if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'): - return [] - format_id = join_nonempty( - stream_format, - audio_lang and 'audio-%s' % audio_lang, - hardsub_lang and 'hardsub-%s' % hardsub_lang) - if 'hls' in stream_format: - adaptive_formats = self._extract_m3u8_formats( - url, video_id, 'mp4', m3u8_id=format_id, - note='Downloading %s information' % format_id, - fatal=False) - elif stream_format == 'dash': - adaptive_formats = self._extract_mpd_formats( - url, video_id, mpd_id=format_id, - note='Downloading %s information' % format_id, - fatal=False) - if audio_lang: - for f in adaptive_formats: - if f.get('acodec') != 'none': - f['language'] = audio_lang - return adaptive_formats - - def _set_api_params(self): - webpage = self._download_webpage( - 'https://vrv.co/', None, headers=self.geo_verification_headers()) - self._API_PARAMS = self._parse_json(self._search_regex( - [ - r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)', - r'window\.__APP_CONFIG__\s*=\s*({.+})' - ], webpage, 'app config'), None)['cxApiParams'] - self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') - - -class VRVIE(VRVBaseIE): - IE_NAME = 'vrv' - _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)' - _TESTS = [{ - 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT', - 'info_dict': { - 'id': 'GR9PNZ396', - 'ext': 'mp4', - 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT', - 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f', - 'uploader_id': 'seeso', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # movie listing - 'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT', - 'info_dict': { - 'id': 'G6NQXZ1J6', - 'title': 'Lily C.A.T', - 'description': 'md5:988b031e7809a6aeb60968be4af7db07', - }, - 'playlist_count': 2, - }] - _NETRC_MACHINE = 'vrv' - - def _perform_login(self, username, password): - token_credentials = self._call_api( - 'authenticate/by:credentials', None, 'Token Credentials', data={ - 'email': username, - 'password': password, - }) - self._TOKEN = token_credentials['oauth_token'] - self._TOKEN_SECRET = token_credentials['oauth_token_secret'] - - def _initialize_pre_login(self): - return self._set_api_params() - - def _real_extract(self, url): - video_id = self._match_id(url) - - object_data = self._call_cms(self._get_cms_resource( - 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0] - resource_path = object_data['__links__']['resource']['href'] - video_data = self._call_cms(resource_path, video_id, 'video') - title = video_data['title'] - description = video_data.get('description') - - if video_data.get('__class__') == 'movie_listing': - items = self._call_cms( - video_data['__links__']['movie_listing/movies']['href'], - video_id, 'movie listing').get('items') or [] - if len(items) != 1: - entries = [] - for item in items: - item_id = item.get('id') - if not item_id: - continue - entries.append(self.url_result( - 'https://vrv.co/watch/' + item_id, - self.ie_key(), item_id, item.get('title'))) - return self.playlist_result(entries, video_id, title, description) - video_data = items[0] - - streams_path = video_data['__links__'].get('streams', {}).get('href') - if not streams_path: - self.raise_login_required() - streams_json = self._call_cms(streams_path, video_id, 'streams') - - audio_locale = streams_json.get('audio_locale') - formats = [] - for stream_type, streams in streams_json.get('streams', {}).items(): - if stream_type in ('adaptive_hls', 'adaptive_dash'): - for stream in streams.values(): - formats.extend(self._extract_vrv_formats( - stream.get('url'), video_id, stream_type.split('_')[1], - audio_locale, stream.get('hardsub_locale'))) - - subtitles = {} - for k in ('captions', 'subtitles'): - for subtitle in streams_json.get(k, {}).values(): - subtitle_url = subtitle.get('url') - if not subtitle_url: - continue - subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({ - 'url': subtitle_url, - 'ext': subtitle.get('format', 'ass'), - }) - - thumbnails = [] - for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)) or []: - thumbnail_url = thumbnail.get('source') - if not thumbnail_url: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int_or_none(thumbnail.get('width')), - 'height': int_or_none(thumbnail.get('height')), - }) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'subtitles': subtitles, - 'thumbnails': thumbnails, - 'description': description, - 'duration': float_or_none(video_data.get('duration_ms'), 1000), - 'uploader_id': video_data.get('channel_id'), - 'series': video_data.get('series_title'), - 'season': video_data.get('season_title'), - 'season_number': int_or_none(video_data.get('season_number')), - 'season_id': video_data.get('season_id'), - 'episode': title, - 'episode_number': int_or_none(video_data.get('episode_number')), - 'episode_id': video_data.get('production_episode_id'), - } - - -class VRVSeriesIE(VRVBaseIE): - IE_NAME = 'vrv:series' - _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)' - _TEST = { - 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider', - 'info_dict': { - 'id': 'G68VXG3G6', - }, - 'playlist_mincount': 11, - } - - def _initialize_pre_login(self): - return self._set_api_params() - - def _real_extract(self, url): - series_id = self._match_id(url) - - seasons_path = self._get_cms_resource( - 'cms:/seasons?series_id=' + series_id, series_id) - seasons_data = self._call_cms(seasons_path, series_id, 'seasons') - - entries = [] - for season in seasons_data.get('items', []): - episodes_path = season['__links__']['season/episodes']['href'] - episodes = self._call_cms(episodes_path, series_id, 'episodes') - for episode in episodes.get('items', []): - episode_id = episode['id'] - entries.append(self.url_result( - 'https://vrv.co/watch/' + episode_id, - 'VRV', episode_id, episode.get('title'))) - - return self.playlist_result(entries, series_id) diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py deleted file mode 100644 index 443ed43cc..000000000 --- a/yt_dlp/extractor/vshare.py +++ /dev/null @@ -1,57 +0,0 @@ -from .common import InfoExtractor -from ..utils import ExtractorError, decode_packed_codes - - -class VShareIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)' - _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)'] - _TESTS = [{ - 'url': 'https://vshare.io/d/0f64ce6', - 'md5': '17b39f55b5497ae8b59f5fbce8e35886', - 'info_dict': { - 'id': '0f64ce6', - 'title': 'vl14062007715967', - 'ext': 'mp4', - } - }, { - 'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1', - 'only_matching': True, - }] - - def _extract_packed(self, webpage): - packed = self._search_regex( - r'(eval\(function.+)', webpage, 'packed code') - unpacked = decode_packed_codes(packed) - digits = self._search_regex(r'\[([\d,]+)\]', unpacked, 'digits') - digits = [int(digit) for digit in digits.split(',')] - key_digit = self._search_regex( - r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') - chars = [chr(d - int(key_digit)) for d in digits] - return ''.join(chars) - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, - video_id, headers={'Referer': url}) - - title = self._html_extract_title(webpage) - title = title.split(' - ')[0] - - error = self._html_search_regex( - r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage, - 'error', default=None) - if error: - raise ExtractorError(error, expected=True) - - info = self._parse_html5_media_entries( - url, '<video>%s</video>' % self._extract_packed(webpage), - video_id)[0] - - info.update({ - 'id': video_id, - 'title': title, - }) - - return info diff --git a/yt_dlp/extractor/vupload.py b/yt_dlp/extractor/vupload.py deleted file mode 100644 index 23ea70c77..000000000 --- a/yt_dlp/extractor/vupload.py +++ /dev/null @@ -1,52 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - parse_duration, - parse_filesize, - extract_attributes, - int_or_none, - js_to_json -) - - -class VuploadIE(InfoExtractor): - _VALID_URL = r'https://vupload\.com/v/(?P<id>[a-z0-9]+)' - _TESTS = [{ - 'url': 'https://vupload.com/v/u28d0pl2tphy', - 'md5': '9b42a4a193cca64d80248e58527d83c8', - 'info_dict': { - 'id': 'u28d0pl2tphy', - 'ext': 'mp4', - 'description': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb', - 'title': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_extract_title(webpage) - video_json = self._parse_json(self._html_search_regex(r'sources:\s*(.+?]),', webpage, 'video'), video_id, transform_source=js_to_json) - formats = [] - for source in video_json: - if source['src'].endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(source['src'], video_id, m3u8_id='hls')) - duration = parse_duration(self._html_search_regex( - r'<i\s*class=["\']fad\s*fa-clock["\']></i>\s*([\d:]+)\s*</div>', webpage, 'duration', fatal=False)) - filesize_approx = parse_filesize(self._html_search_regex( - r'<i\s*class=["\']fad\s*fa-save["\']></i>\s*([^<]+)\s*</div>', webpage, 'filesize', fatal=False)) - extra_video_info = extract_attributes(self._html_search_regex( - r'(<video[^>]+>)', webpage, 'video_info', fatal=False)) - description = self._html_search_meta('description', webpage) - - return { - 'id': video_id, - 'formats': formats, - 'duration': duration, - 'filesize_approx': filesize_approx, - 'width': int_or_none(extra_video_info.get('width')), - 'height': int_or_none(extra_video_info.get('height')), - 'format_id': extra_video_info.get('height', '') + 'p', - 'title': title, - 'description': description, - } diff --git a/yt_dlp/extractor/vyborymos.py b/yt_dlp/extractor/vyborymos.py deleted file mode 100644 index 386518795..000000000 --- a/yt_dlp/extractor/vyborymos.py +++ /dev/null @@ -1,52 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_str - - -class VyboryMosIE(InfoExtractor): - _VALID_URL = r'https?://vybory\.mos\.ru/(?:#precinct/|account/channels\?.*?\bstation_id=)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://vybory.mos.ru/#precinct/13636', - 'info_dict': { - 'id': '13636', - 'ext': 'mp4', - 'title': 're:^Участковая избирательная комиссия №2231 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'Россия, Москва, улица Введенского, 32А', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - } - }, { - 'url': 'http://vybory.mos.ru/account/channels?station_id=13636', - 'only_matching': True, - }] - - def _real_extract(self, url): - station_id = self._match_id(url) - - channels = self._download_json( - 'http://vybory.mos.ru/account/channels?station_id=%s' % station_id, - station_id, 'Downloading channels JSON') - - formats = [] - for cam_num, (sid, hosts, name, _) in enumerate(channels, 1): - for num, host in enumerate(hosts, 1): - formats.append({ - 'url': 'http://%s/master.m3u8?sid=%s' % (host, sid), - 'ext': 'mp4', - 'format_id': 'camera%d-host%d' % (cam_num, num), - 'format_note': '%s, %s' % (name, host), - }) - - info = self._download_json( - 'http://vybory.mos.ru/json/voting_stations/%s/%s.json' - % (compat_str(station_id)[:3], station_id), - station_id, 'Downloading station JSON', fatal=False) or {} - - return { - 'id': station_id, - 'title': info.get('name') or station_id, - 'description': info.get('address'), - 'is_live': True, - 'formats': formats, - } diff --git a/yt_dlp/extractor/vzaar.py b/yt_dlp/extractor/vzaar.py deleted file mode 100644 index 19908a929..000000000 --- a/yt_dlp/extractor/vzaar.py +++ /dev/null @@ -1,100 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - float_or_none, - unified_timestamp, - url_or_none, -) - - -class VzaarIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)' - _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)'] - _TESTS = [{ - # HTTP and HLS - 'url': 'https://vzaar.com/videos/1152805', - 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf', - 'info_dict': { - 'id': '1152805', - 'ext': 'mp4', - 'title': 'sample video (public)', - }, - }, { - 'url': 'https://view.vzaar.com/27272/player', - 'md5': '3b50012ac9bbce7f445550d54e0508f2', - 'info_dict': { - 'id': '27272', - 'ext': 'mp3', - 'title': 'MP3', - }, - }, { - # hlsAes = true - 'url': 'https://view.vzaar.com/11379930/player', - 'info_dict': { - 'id': '11379930', - 'ext': 'mp4', - 'title': 'Videoaula', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # with null videoTitle - 'url': 'https://view.vzaar.com/20313539/download', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - video_data = self._download_json( - 'http://view.vzaar.com/v2/%s/video' % video_id, video_id) - - title = video_data.get('videoTitle') or video_id - - formats = [] - - source_url = url_or_none(video_data.get('sourceUrl')) - if source_url: - f = { - 'url': source_url, - 'format_id': 'http', - 'quality': 1, - } - if 'audio' in source_url: - f.update({ - 'vcodec': 'none', - 'ext': 'mp3', - }) - else: - f.update({ - 'width': int_or_none(video_data.get('width')), - 'height': int_or_none(video_data.get('height')), - 'ext': 'mp4', - 'fps': float_or_none(video_data.get('fps')), - }) - formats.append(f) - - video_guid = video_data.get('guid') - usp = video_data.get('usp') - if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict): - hls_aes = video_data.get('hlsAes') - qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items()) - url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id) - m3u8_formats = self._extract_m3u8_formats( - url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - if hls_aes: - for f in m3u8_formats: - f['hls_aes'] = {'uri': url_templ % ('goose', '') + qs} - formats.extend(m3u8_formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': self._proto_relative_url(video_data.get('poster')), - 'duration': float_or_none(video_data.get('videoDuration')), - 'timestamp': unified_timestamp(video_data.get('ts')), - 'formats': formats, - } diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py deleted file mode 100644 index 155008f8c..000000000 --- a/yt_dlp/extractor/wakanim.py +++ /dev/null @@ -1,75 +0,0 @@ -from urllib.parse import unquote - -from .common import InfoExtractor -from ..utils import ( - merge_dicts, - urljoin, -) - - -class WakanimIE(InfoExtractor): - _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu', - 'info_dict': { - 'id': '2997', - 'ext': 'mp4', - 'title': 'Episode 02', - 'description': 'md5:2927701ea2f7e901de8bfa8d39b2852d', - 'series': 'The Asterisk War (OmU.)', - 'season_number': 1, - 'episode': 'Episode 02', - 'episode_number': 2, - }, - 'params': { - 'skip_download': True, - }, - }, { - # DRM Protected - 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu', - 'only_matching': True, - }] - _GEO_BYPASS = False - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - if 'Geoblocking' in webpage: - if '/de/' in url: - self.raise_geo_restricted(countries=['DE', 'AT', 'CH']) - else: - self.raise_geo_restricted(countries=['RU']) - - manifest_url = urljoin(url, self._search_regex( - r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'manifest url', - group='url')) - if not self.get_param('allow_unplayable_formats'): - # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls - encryption = self._search_regex( - r'encryption%3D(c(?:enc|bc(?:s-aapl)?))', - manifest_url, 'encryption', default=None) - if encryption in ('cenc', 'cbcs-aapl'): - self.report_drm(video_id) - - if 'format=mpd-time-cmaf' in unquote(manifest_url): - formats = self._extract_mpd_formats( - manifest_url, video_id, mpd_id='dash') - else: - formats = self._extract_m3u8_formats( - manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - - info = self._search_json_ld(webpage, video_id, default={}) - - title = self._search_regex( - (r'<h1[^>]+\bclass=["\']episode_h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1', - r'<span[^>]+\bclass=["\']episode_title["\'][^>]*>(?P<title>[^<]+)'), - webpage, 'title', default=None, group='title') - - return merge_dicts(info, { - 'id': video_id, - 'title': title, - 'formats': formats, - }) diff --git a/yt_dlp/extractor/watchbox.py b/yt_dlp/extractor/watchbox.py deleted file mode 100644 index c973ca998..000000000 --- a/yt_dlp/extractor/watchbox.py +++ /dev/null @@ -1,153 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - js_to_json, - strip_or_none, - try_get, - unescapeHTML, - unified_timestamp, -) - - -class WatchBoxIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?P<kind>serien|filme)/(?:[^/]+/)*[^/]+-(?P<id>\d+)' - _TESTS = [{ - # film - 'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html', - 'info_dict': { - 'id': '341368', - 'ext': 'mp4', - 'title': 'Free Jimmy', - 'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 4890, - 'age_limit': 16, - 'release_year': 2009, - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Failed to download m3u8 information'], - }, { - # episode - 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html', - 'info_dict': { - 'id': '328286', - 'ext': 'mp4', - 'title': 'S01 E01 - Date in der Hölle', - 'description': 'md5:2f31c74a8186899f33cb5114491dae2b', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1291, - 'age_limit': 12, - 'release_year': 2010, - 'series': 'Ugly Americans', - 'season_number': 1, - 'episode': 'Date in der Hölle', - 'episode_number': 1, - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Failed to download m3u8 information'], - }, { - 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - kind, video_id = mobj.group('kind', 'id') - - webpage = self._download_webpage(url, video_id) - - player_config = self._parse_json( - self._search_regex( - r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage, - 'player config', default='{}', group='data'), - video_id, transform_source=unescapeHTML, fatal=False) - - if not player_config: - player_config = self._parse_json( - self._search_regex( - r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config', - default='{}'), - video_id, transform_source=js_to_json, fatal=False) or {} - - source = player_config.get('source') or {} - - video_id = compat_str(source.get('videoId') or video_id) - - devapi = self._download_json( - 'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={ - 'format': 'json', - 'apikey': 'hbbtv', - }, fatal=False) - - item = try_get(devapi, lambda x: x['items'][0], dict) or {} - - title = item.get('title') or try_get( - item, lambda x: x['movie']['headline_movie'], - compat_str) or source['title'] - - formats = [] - hls_url = item.get('media_videourl_hls') or source.get('hls') - if hls_url: - formats.extend(self._extract_m3u8_formats( - hls_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - dash_url = item.get('media_videourl_wv') or source.get('dash') - if dash_url: - formats.extend(self._extract_mpd_formats( - dash_url, video_id, mpd_id='dash', fatal=False)) - mp4_url = item.get('media_videourl') - if mp4_url: - formats.append({ - 'url': mp4_url, - 'format_id': 'mp4', - 'width': int_or_none(item.get('width')), - 'height': int_or_none(item.get('height')), - 'tbr': int_or_none(item.get('bitrate')), - }) - - description = strip_or_none(item.get('descr')) - thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail') - duration = int_or_none(item.get('media_length') or source.get('length')) - timestamp = unified_timestamp(item.get('pubDate')) - view_count = int_or_none(item.get('media_views')) - age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk'])) - release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year'])) - - info = { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'view_count': view_count, - 'age_limit': age_limit, - 'release_year': release_year, - 'formats': formats, - } - - if kind.lower() == 'serien': - series = try_get( - item, lambda x: x['special']['title'], - compat_str) or source.get('format') - season_number = int_or_none(self._search_regex( - r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number', - default=None) or self._search_regex( - r'/staffel-(\d+)/', url, 'season number', default=None)) - episode = source.get('title') - episode_number = int_or_none(self._search_regex( - r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number', - default=None)) - info.update({ - 'series': series, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - }) - - return info diff --git a/yt_dlp/extractor/watchindianporn.py b/yt_dlp/extractor/watchindianporn.py deleted file mode 100644 index 3ded2d1d4..000000000 --- a/yt_dlp/extractor/watchindianporn.py +++ /dev/null @@ -1,65 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import parse_duration - - -class WatchIndianPornIE(InfoExtractor): - IE_DESC = 'Watch Indian Porn' - _VALID_URL = r'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html' - _TEST = { - 'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html', - 'md5': '249589a164dde236ec65832bfce17440', - 'info_dict': { - 'id': 'RZa2avywNPa', - 'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera', - 'ext': 'mp4', - 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 226, - 'view_count': int, - 'categories': list, - 'age_limit': 18, - } - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] - - title = self._html_search_regex(( - r'<title>(.+?)\s*-\s*Indian\s+Porn</title>', - r'<h4>(.+?)</h4>' - ), webpage, 'title') - - duration = parse_duration(self._search_regex( - r'Time:\s*<strong>\s*(.+?)\s*</strong>', - webpage, 'duration', fatal=False)) - - view_count = int(self._search_regex( - r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>', - webpage, 'view count', fatal=False)) - - categories = re.findall( - r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>', - webpage) - - info_dict.update({ - 'id': video_id, - 'display_id': display_id, - 'http_headers': { - 'Referer': url, - }, - 'title': title, - 'duration': duration, - 'view_count': view_count, - 'categories': categories, - 'age_limit': 18, - }) - - return info_dict diff --git a/yt_dlp/extractor/willow.py b/yt_dlp/extractor/willow.py deleted file mode 100644 index 0ec9c9d6e..000000000 --- a/yt_dlp/extractor/willow.py +++ /dev/null @@ -1,56 +0,0 @@ -from ..utils import ExtractorError -from .common import InfoExtractor - - -class WillowIE(InfoExtractor): - _VALID_URL = r'https?://(www\.)?willow\.tv/videos/(?P<id>[0-9a-z-_]+)' - _GEO_COUNTRIES = ['US'] - - _TESTS = [{ - 'url': 'http://willow.tv/videos/d5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021', - 'info_dict': { - 'id': '169662', - 'display_id': 'd5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021', - 'ext': 'mp4', - 'title': 'Winning Moment: 4th Test, England vs India', - 'thumbnail': 'https://aimages.willow.tv/ytThumbnails/6748_D5winning_moment.jpg', - 'duration': 233, - 'timestamp': 1630947954, - 'upload_date': '20210906', - 'location': 'Kennington Oval, London', - 'series': 'India tour of England 2021', - }, - 'params': { - 'skip_download': True, # AES-encrypted m3u8 - }, - }, { - 'url': 'http://willow.tv/videos/highlights-short-ind-vs-nz-streaming-online-2nd-t20i-new-zealand-tour-of-india-2021', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_data = self._parse_json(self._html_search_regex( - r'var\s+data_js\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, - 'data_js'), video_id) - - video = next((v for v in video_data.get('trending_videos') or [] - if v.get('secureurl')), None) - if not video: - raise ExtractorError('No videos found') - - formats = self._extract_m3u8_formats(video['secureurl'], video_id, 'mp4') - - return { - 'id': str(video.get('content_id')), - 'display_id': video.get('video_slug'), - 'title': video.get('video_name') or self._html_search_meta('twitter:title', webpage), - 'formats': formats, - 'thumbnail': video.get('yt_thumb_url') or self._html_search_meta( - 'twitter:image', webpage, default=None), - 'duration': video.get('duration_seconds'), - 'timestamp': video.get('created_date'), - 'location': video.get('venue'), - 'series': video.get('series_name'), - } diff --git a/yt_dlp/extractor/xbef.py b/yt_dlp/extractor/xbef.py deleted file mode 100644 index ac69528a3..000000000 --- a/yt_dlp/extractor/xbef.py +++ /dev/null @@ -1,42 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote - - -class XBefIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking', - 'md5': 'a478b565baff61634a98f5e5338be995', - 'info_dict': { - 'id': '5119', - 'ext': 'mp4', - 'title': 'md5:7358a9faef8b7b57acda7c04816f170e', - 'age_limit': 18, - 'thumbnail': r're:^http://.*\.jpg', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex( - r'<h1[^>]*>(.*?)</h1>', webpage, 'title') - - config_url_enc = self._download_webpage( - 'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id, - note='Retrieving config URL') - config_url = compat_urllib_parse_unquote(config_url_enc) - config = self._download_xml( - config_url, video_id, note='Retrieving config') - - video_url = config.find('./file').text - thumbnail = config.find('./image').text - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'age_limit': 18, - } diff --git a/yt_dlp/extractor/xtube.py b/yt_dlp/extractor/xtube.py deleted file mode 100644 index db8292589..000000000 --- a/yt_dlp/extractor/xtube.py +++ /dev/null @@ -1,214 +0,0 @@ -import itertools -import re - -from .common import InfoExtractor -from ..networking import Request -from ..utils import ( - int_or_none, - js_to_json, - orderedSet, - parse_duration, - str_to_int, - url_or_none, -) - - -class XTubeIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - xtube:| - https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-) - ) - (?P<id>[^/?&#]+) - ''' - - _TESTS = [{ - # old URL schema - 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', - 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', - 'info_dict': { - 'id': 'kVTUy_G222_', - 'ext': 'mp4', - 'title': 'strange erotica', - 'description': 'contains:an ET kind of thing', - 'uploader': 'greenshowers', - 'duration': 450, - 'view_count': int, - 'comment_count': int, - 'age_limit': 18, - } - }, { - # new URL schema - 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', - 'only_matching': True, - }, { - 'url': 'xtube:625837', - 'only_matching': True, - }, { - 'url': 'xtube:kVTUy_G222_', - 'only_matching': True, - }, { - 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - if not display_id: - display_id = video_id - - if video_id.isdigit() and len(video_id) < 11: - url_pattern = 'http://www.xtube.com/video-watch/-%s' - else: - url_pattern = 'http://www.xtube.com/watch.php?v=%s' - - webpage = self._download_webpage( - url_pattern % video_id, display_id, headers={ - 'Cookie': 'age_verified=1; cookiesAccepted=1', - }) - - title, thumbnail, duration, sources, media_definition = [None] * 5 - - config = self._parse_json(self._search_regex( - r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config', - default='{}'), video_id, transform_source=js_to_json, fatal=False) - if config: - config = config.get('mainRoll') - if isinstance(config, dict): - title = config.get('title') - thumbnail = config.get('poster') - duration = int_or_none(config.get('duration')) - sources = config.get('sources') or config.get('format') - media_definition = config.get('mediaDefinition') - - if not isinstance(sources, dict) and not media_definition: - sources = self._parse_json(self._search_regex( - r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', - webpage, 'sources', group='sources'), video_id, - transform_source=js_to_json) - - formats = [] - format_urls = set() - - if isinstance(sources, dict): - for format_id, format_url in sources.items(): - format_url = url_or_none(format_url) - if not format_url: - continue - if format_url in format_urls: - continue - format_urls.add(format_url) - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'height': int_or_none(format_id), - }) - - if isinstance(media_definition, list): - for media in media_definition: - video_url = url_or_none(media.get('videoUrl')) - if not video_url: - continue - if video_url in format_urls: - continue - format_urls.add(video_url) - format_id = media.get('format') - if format_id == 'hls': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif format_id == 'mp4': - height = int_or_none(media.get('quality')) - formats.append({ - 'url': video_url, - 'format_id': '%s-%d' % (format_id, height) if height else format_id, - 'height': height, - }) - - self._remove_duplicate_formats(formats) - - if not title: - title = self._search_regex( - (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), - webpage, 'title', group='title') - description = self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'twitter:description', webpage, default=None) or self._search_regex( - r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) - uploader = self._search_regex( - (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', - r'<span[^>]+class="nickname"[^>]*>([^<]+)'), - webpage, 'uploader', fatal=False) - if not duration: - duration = parse_duration(self._search_regex( - r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>', - webpage, 'duration', fatal=False)) - view_count = str_to_int(self._search_regex( - (r'["\']viewsCount["\'][^>]*>(\d+)\s+views', - r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'), - webpage, 'view count', fatal=False)) - comment_count = str_to_int(self._html_search_regex( - r'>Comments? \(([\d,\.]+)\)<', - webpage, 'comment count', fatal=False)) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'duration': duration, - 'view_count': view_count, - 'comment_count': comment_count, - 'age_limit': 18, - 'formats': formats, - } - - -class XTubeUserIE(InfoExtractor): - IE_DESC = 'XTube user profile' - _VALID_URL = r'https?://(?:www\.)?xtube\.com/profile/(?P<id>[^/]+-\d+)' - _TEST = { - 'url': 'http://www.xtube.com/profile/greenshowers-4056496', - 'info_dict': { - 'id': 'greenshowers-4056496', - 'age_limit': 18, - }, - 'playlist_mincount': 154, - } - - def _real_extract(self, url): - user_id = self._match_id(url) - - entries = [] - for pagenum in itertools.count(1): - request = Request( - 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum), - headers={ - 'Cookie': 'popunder=4', - 'X-Requested-With': 'XMLHttpRequest', - 'Referer': url, - }) - - page = self._download_json( - request, user_id, 'Downloading videos JSON page %d' % pagenum) - - html = page.get('html') - if not html: - break - - for video_id in orderedSet([video_id for _, video_id in re.findall( - r'data-plid=(["\'])(.+?)\1', html)]): - entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key())) - - page_count = int_or_none(page.get('pageCount')) - if not page_count or pagenum == page_count: - break - - playlist = self.playlist_result(entries, user_id) - playlist['age_limit'] = 18 - return playlist diff --git a/yt_dlp/extractor/xuite.py b/yt_dlp/extractor/xuite.py deleted file mode 100644 index 71ddadd42..000000000 --- a/yt_dlp/extractor/xuite.py +++ /dev/null @@ -1,149 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - get_element_by_attribute, - parse_iso8601, - remove_end, -) - - -class XuiteIE(InfoExtractor): - IE_DESC = '隨意窩Xuite影音' - _REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?' - _VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64 - _TESTS = [{ - # Audio - 'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2', - 'md5': 'e79284c87b371424885448d11f6398c8', - 'info_dict': { - 'id': '3860914', - 'ext': 'mp3', - 'title': '孤單南半球-歐德陽', - 'description': '孤單南半球-歐德陽', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 247.246, - 'timestamp': 1314932940, - 'upload_date': '20110902', - 'uploader': '阿能', - 'uploader_id': '15973816', - 'categories': ['個人短片'], - }, - }, { - # Video with only one format - 'url': 'http://vlog.xuite.net/play/WUxxR2xCLTI1OTI1MDk5LmZsdg==', - 'md5': '21f7b39c009b5a4615b4463df6eb7a46', - 'info_dict': { - 'id': '25925099', - 'ext': 'mp4', - 'title': 'BigBuckBunny_320x180', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 596.458, - 'timestamp': 1454242500, - 'upload_date': '20160131', - 'uploader': '屁姥', - 'uploader_id': '12158353', - 'categories': ['個人短片'], - 'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4', - }, - }, { - # Video with two formats - 'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==', - 'md5': '1166e0f461efe55b62e26a2d2a68e6de', - 'info_dict': { - 'id': '21301170', - 'ext': 'mp4', - 'title': '暗殺教室 02', - 'description': '字幕:【極影字幕社】', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1384.907, - 'timestamp': 1421481240, - 'upload_date': '20150117', - 'uploader': '我只是想認真點', - 'uploader_id': '242127761', - 'categories': ['電玩動漫'], - }, - 'skip': 'Video removed', - }, { - # Video with encoded media id - # from http://forgetfulbc.blogspot.com/2016/06/date.html - 'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0', - 'info_dict': { - 'id': '27447336', - 'ext': 'mp4', - 'title': '男女平權只是口號?專家解釋約會時男生是否該幫女生付錢 (中字)', - 'description': 'md5:1223810fa123b179083a3aed53574706', - 'timestamp': 1466160960, - 'upload_date': '20160617', - 'uploader': 'B.C. & Lowy', - 'uploader_id': '232279340', - }, - }, { - 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9', - 'only_matching': True, - }] - - def _real_extract(self, url): - # /play/ URLs provide embedded video URL and more metadata - url = url.replace('/embed/', '/play/') - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - error_msg = self._search_regex( - r'<div id="error-message-content">([^<]+)', - webpage, 'error message', default=None) - if error_msg: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_msg), - expected=True) - - media_info = self._parse_json(self._search_regex( - r'var\s+mediaInfo\s*=\s*({.*});', webpage, 'media info'), video_id) - - video_id = media_info['MEDIA_ID'] - - formats = [] - for key in ('html5Url', 'html5HQUrl'): - video_url = media_info.get(key) - if not video_url: - continue - format_id = self._search_regex( - r'\bq=(.+?)\b', video_url, 'format id', default=None) - formats.append({ - 'url': video_url, - 'ext': 'mp4' if format_id.isnumeric() else format_id, - 'format_id': format_id, - 'height': int(format_id) if format_id.isnumeric() else None, - }) - - timestamp = media_info.get('PUBLISH_DATETIME') - if timestamp: - timestamp = parse_iso8601(timestamp + ' +0800', ' ') - - category = media_info.get('catName') - categories = [category] if category else [] - - uploader = media_info.get('NICKNAME') - uploader_url = None - - author_div = get_element_by_attribute('itemprop', 'author', webpage) - if author_div: - uploader = uploader or self._html_search_meta('name', author_div) - uploader_url = self._html_search_regex( - r'<link[^>]+itemprop="url"[^>]+href="([^"]+)"', author_div, - 'uploader URL', fatal=False) - - return { - 'id': video_id, - 'title': media_info['TITLE'], - 'description': remove_end(media_info.get('metaDesc'), ' (Xuite 影音)'), - 'thumbnail': media_info.get('ogImageUrl'), - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': media_info.get('MEMBER_ID'), - 'uploader_url': uploader_url, - 'duration': float_or_none(media_info.get('MEDIA_DURATION'), 1000000), - 'categories': categories, - 'formats': formats, - } diff --git a/yt_dlp/extractor/yesjapan.py b/yt_dlp/extractor/yesjapan.py deleted file mode 100644 index 94e41660d..000000000 --- a/yt_dlp/extractor/yesjapan.py +++ /dev/null @@ -1,56 +0,0 @@ -from .common import InfoExtractor -from ..networking import HEADRequest -from ..utils import get_element_by_attribute, parse_iso8601 - - -class YesJapanIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html' - _TEST = { - 'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html', - 'md5': 'f0be416314e5be21a12b499b330c21cf', - 'info_dict': { - 'id': '726497834', - 'title': 'Japanese in 5! #20 - WA And GA Particle Usages', - 'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....', - 'ext': 'mp4', - 'timestamp': 1416391590, - 'upload_date': '20141119', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - video_url = self._og_search_video_url(webpage) - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) - - timestamp = None - submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage) - if submit_info: - timestamp = parse_iso8601(self._search_regex( - r'datetime="([^"]+)"', submit_info, 'upload date', fatal=False, default=None)) - - # attempt to resolve the final URL in order to get a proper extension - redirect_req = HEADRequest(video_url) - req = self._request_webpage( - redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False) - if req: - video_url = req.url - - formats = [{ - 'format_id': 'sd', - 'url': video_url, - }] - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'timestamp': timestamp, - 'thumbnail': thumbnail, - } diff --git a/yt_dlp/extractor/yinyuetai.py b/yt_dlp/extractor/yinyuetai.py deleted file mode 100644 index b2e3172f9..000000000 --- a/yt_dlp/extractor/yinyuetai.py +++ /dev/null @@ -1,52 +0,0 @@ -from .common import InfoExtractor -from ..utils import ExtractorError - - -class YinYueTaiIE(InfoExtractor): - IE_NAME = 'yinyuetai:video' - IE_DESC = '音悦Tai' - _VALID_URL = r'https?://v\.yinyuetai\.com/video(?:/h5)?/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://v.yinyuetai.com/video/2322376', - 'md5': '6e3abe28d38e3a54b591f9f040595ce0', - 'info_dict': { - 'id': '2322376', - 'ext': 'mp4', - 'title': '少女时代_PARTY_Music Video Teaser', - 'creator': '少女时代', - 'duration': 25, - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { - 'url': 'http://v.yinyuetai.com/video/h5/2322376', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - info = self._download_json( - 'http://ext.yinyuetai.com/main/get-h-mv-info?json=true&videoId=%s' % video_id, video_id, - 'Downloading mv info')['videoInfo']['coreVideoInfo'] - - if info['error']: - raise ExtractorError(info['errorMsg'], expected=True) - - formats = [{ - 'url': format_info['videoUrl'], - 'format_id': format_info['qualityLevel'], - 'format': format_info.get('qualityLevelName'), - 'filesize': format_info.get('fileSize'), - # though URLs ends with .flv, the downloaded files are in fact mp4 - 'ext': 'mp4', - 'tbr': format_info.get('bitrate'), - } for format_info in info['videoUrlModels']] - - return { - 'id': video_id, - 'title': info['videoName'], - 'thumbnail': info.get('bigHeadImage'), - 'creator': info.get('artistNames'), - 'duration': info.get('duration'), - 'formats': formats, - } diff --git a/yt_dlp/extractor/ynet.py b/yt_dlp/extractor/ynet.py deleted file mode 100644 index a7d7371f3..000000000 --- a/yt_dlp/extractor/ynet.py +++ /dev/null @@ -1,48 +0,0 @@ -import json -import re -import urllib.parse - -from .common import InfoExtractor - - -class YnetIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html' - _TESTS = [ - { - 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', - 'info_dict': { - 'id': 'L-11659-99244', - 'ext': 'flv', - 'title': 'איש לא יודע מאיפה באנו', - 'thumbnail': r're:^https?://.*\.jpg', - } - }, { - 'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html', - 'info_dict': { - 'id': 'L-8859-84418', - 'ext': 'flv', - 'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין", - 'thumbnail': r're:^https?://.*\.jpg', - } - } - ] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - content = urllib.parse.unquote_plus(self._og_search_video_url(webpage)) - config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config')) - f4m_url = config['clip']['url'] - title = self._og_search_title(webpage) - m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title) - if m: - title = m.group('title') - formats = self._extract_f4m_formats(f4m_url, video_id) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': self._og_search_thumbnail(webpage), - }