From 5867a1678924ad25a4784abfa5dbd28b5b69eb67 Mon Sep 17 00:00:00 2001 From: nixxo Date: Mon, 9 Nov 2020 10:59:25 +0100 Subject: [PATCH] [rcs] fixed embeds detection, fixed tests --- youtube_dlc/extractor/extractors.py | 3 +- youtube_dlc/extractor/rcs.py | 59 ++++++++++++++--------------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index c3b76f0392..ecbe68ab08 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -937,8 +937,7 @@ ) from .rbmaradio import RBMARadioIE from .rcs import ( - CorriereIE, - GazzettaIE, + RCSIE, RCSEmbedsIE, RCSVariousIE, ) diff --git a/youtube_dlc/extractor/rcs.py b/youtube_dlc/extractor/rcs.py index 8dbd9913b9..830182c6d5 100644 --- a/youtube_dlc/extractor/rcs.py +++ b/youtube_dlc/extractor/rcs.py @@ -14,7 +14,7 @@ ) -class RCSIE(InfoExtractor): +class RCSBaseIE(InfoExtractor): _ALL_REPLACE = { 'media2vam.corriere.it.edgesuite.net': 'media2vam-corriere-it.akamaized.net', @@ -237,7 +237,11 @@ def _real_extract(self, url): # if no video data found try search for iframes emb = RCSEmbedsIE._extract_url(page) if emb: - return self._real_extract(emb) + return { + '_type': 'url_transparent', + 'url': emb, + 'ie_key': RCSEmbedsIE.ie_key() + } if not video_data: raise ExtractorError('Video data not found in the page') @@ -247,7 +251,7 @@ def _real_extract(self, url): description = (video_data.get('description') or clean_html(video_data.get('htmlDescription'))) - uploader = video_data.get('provider') or mobj.gruop('cdn') + uploader = video_data.get('provider') or mobj.group('cdn') return { 'id': video_id, @@ -258,8 +262,7 @@ def _real_extract(self, url): } -class RCSEmbedsIE(RCSIE): - IE_NAME = 'rcs:rcs' +class RCSEmbedsIE(RCSBaseIE): _VALID_URL = r'''(?x) https?://(?Pvideo)\. (?P @@ -279,6 +282,16 @@ class RCSEmbedsIE(RCSIE): 'description': 'md5:65b09633df9ffee57f48b39e34c9e067', 'uploader': 'rcs.it', } + }, { + 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789', + 'md5': 'a043e3fecbe4d9ed7fc5d888652a5440', + 'info_dict': { + 'id': 'gazzanet-mo05-0000260789', + 'ext': 'mp4', + 'title': 'Valentino Rossi e papĂ  Graziano si divertono col drifting', + 'description': 'md5:a8bf90d6adafd9815f70fc74c0fc370a', + 'uploader': 'rcd', + } }, { 'url': 'https://video.corriere.it/video-embed/b727632a-f9d0-11ea-91b0-38d50a849abb?player', 'match_only': True @@ -324,17 +337,17 @@ def _extract_url(webpage): return urls[0] if urls else None -class CorriereIE(RCSIE): - IE_NAME = 'rcs:corriere' +class RCSIE(RCSBaseIE): _VALID_URL = r'''(?x)https?://(?Pvideo|viaggi)\. (?P (?: - corrieredelmezzogiorno\.| - corrieredelveneto\.| - corrieredibologna\.| - corrierefiorentino\. - )? - corriere\.it)/.+?/(?P[^/]+)(?=\?|/$|$)''' + corrieredelmezzogiorno\. + |corrieredelveneto\. + |corrieredibologna\. + |corrierefiorentino\. + )?corriere\.it + |(?:gazzanet\.)?gazzetta\.it) + /(?!video-embed/).+?/(?P[^/\?]+)(?=\?|/$|$)''' _TESTS = [{ 'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb', 'md5': '0f4ededc202b0f00b6e509d831e2dcda', @@ -356,18 +369,6 @@ class CorriereIE(RCSIE): 'uploader': 'DOVE Viaggi', } }, { - 'url': 'https://video.corriere.it/video-embed/b727632a-f9d0-11ea-91b0-38d50a849abb?player', - 'match_only': True - }, { - 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945', - 'match_only': True - }] - - -class GazzettaIE(RCSIE): - IE_NAME = 'rcs:gazzetta' - _VALID_URL = r'https?://(?Pvideo)\.(?P(?:gazzanet\.)?gazzetta\.it)/.+?/(?P[^/]+?)(?:$|\?)' - _TESTS = [{ 'url': 'https://video.gazzetta.it/video-motogp-catalogna-cadute-dovizioso-vale-rossi/49612410-00ca-11eb-bcd8-30d4253e0140?vclk=Videobar', 'md5': 'eedc1b5defd18e67383afef51ff7bdf9', 'info_dict': { @@ -378,16 +379,12 @@ class GazzettaIE(RCSIE): 'uploader': 'AMorici', } }, { - 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140', - 'match_only': True - }, { - 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789', + 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945', 'match_only': True }] -class RCSVariousIE(RCSIE): - IE_NAME = 'rcs:various' +class RCSVariousIE(RCSBaseIE): _VALID_URL = r'''(?x)https?://www\. (?P leitv\.it|