From 3938a9212c3d1aa30a7f6db12b997d94afd8b646 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 27 Nov 2021 12:01:42 +0530 Subject: [PATCH] [CPTwentyFour] Add extractor (#1769) Closes #1768 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 5 ++++- yt_dlp/extractor/ninecninemedia.py | 35 +++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ed8a23e723..a277bf7226 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -943,7 +943,10 @@ NicovideoSearchIE, NicovideoSearchURLIE, ) -from .ninecninemedia import NineCNineMediaIE +from .ninecninemedia import ( + NineCNineMediaIE, + CPTwentyFourIE, +) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 4aaf21a120..781842721b 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, @@ -99,3 +98,37 @@ def _real_extract(self, url): } return info + + +class CPTwentyFourIE(InfoExtractor): + IE_NAME = 'cp24' + _GEO_COUNTRIES = ['CA'] + _VALID_URL = r'https?://(?:www\.)?cp24\.com/news/(?P[^?#]+)' + + _TESTS = [{ + 'url': 'https://www.cp24.com/news/video-shows-atm-being-ripped-out-of-business-by-pickup-truck-driver-in-mississauga-1.5676877', + 'info_dict': { + 'id': '2328005', + 'ext': 'mp4', + 'title': 'WATCH: Truck rips ATM from Mississauga business', + 'description': 'md5:cf7498480885f080a754389a2b2f7073', + 'timestamp': 1637618377, + 'episode_number': None, + 'season': 'Season 0', + 'season_number': 0, + 'season_id': 57974, + 'series': 'CTV News Toronto', + 'duration': 26.86, + 'thumbnail': 'http://images2.9c9media.com/image_asset/2014_11_5_2eb609a0-475b-0132-fbd6-34b52f6f1279_jpg_2000x1125.jpg', + 'upload_date': '20211122', + }, + 'params': {'skip_download': True, 'format': 'bv'} + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + id, destination = self._search_regex( + r'getAuthStates\("(?P[^"]+)",\s?"(?P[^"]+)"\);', + webpage, 'video id and destination', group=('id', 'destination')) + return self.url_result(f'9c9media:{destination}:{id}', ie=NineCNineMediaIE.ie_key(), video_id=id)