diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 967010826..a06620bb4 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1659,10 +1659,8 @@ RaiIE, RaiNewsIE, RaiPlayIE, - RaiPlayLiveIE, RaiPlayPlaylistIE, RaiPlaySoundIE, - RaiPlaySoundLiveIE, RaiPlaySoundPlaylistIE, RaiSudtirolIE, ) diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py index 20a5235f2..6ae31afb5 100644 --- a/yt_dlp/extractor/la7.py +++ b/yt_dlp/extractor/la7.py @@ -7,10 +7,11 @@ class LA7IE(InfoExtractor): IE_NAME = 'la7.it' - _VALID_URL = r'''(?x)https?://(?: - (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video|news)/| - tg\.la7\.it/repliche-tgla7\?id= - )(?P.+)''' + _VALID_URL = [ + r'https?://(?:www\.)?la7\.it/[^/]+/(?:rivedila7|video|news)/.+-(?P\d{5,})', + r'https?://tg\.la7\.it/repliche-tgla7\?id=(?P\d{5,})', + r'https?://tg\.la7\.it(?:/[^/]+)+-(?P\d{5,})' + ] _TESTS = [{ # single quality video @@ -39,7 +40,7 @@ class LA7IE(InfoExtractor): 'formats': 'count:8', }, }, { - 'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077', + 'url': 'https://tg.la7.it/repliche-tgla7?id=464601', 'only_matching': True, }] _HOST = 'https://awsvodpkg.iltrovatore.it' diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index efb47affc..fb5993522 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -10,6 +10,7 @@ filter_dict, int_or_none, join_nonempty, + make_archive_id, parse_duration, remove_start, strip_or_none, @@ -250,7 +251,10 @@ def _extract_subtitles(url, video_data): class RaiPlayIE(RaiBaseIE): - _VALID_URL = rf'(?Phttps?://(?:www\.)?raiplay\.it/.+?-(?P{RaiBaseIE._UUID_RE}))\.(?:html|json)' + _VALID_URL = [ + rf'(?Phttps?://(?:www\.)?raiplay\.it/.+?-(?P{RaiBaseIE._UUID_RE}))\.(?:html|json)', + r'(?Phttps?://(?:www\.)?raiplay\.it/dirette/(?P[^/?#&]+))', + ] _TESTS = [{ 'url': 'https://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', @@ -321,6 +325,25 @@ class RaiPlayIE(RaiBaseIE): 'timestamp': 1348495020, 'upload_date': '20120924', }, + }, { + # live stream + 'url': 'https://www.raiplay.it/dirette/rainews24', + 'info_dict': { + 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', + 'display_id': 'rainews24', + 'ext': 'mp4', + 'title': r're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', + 'uploader': 'Rai News 24', + 'creator': 'Rai News 24', + 'is_live': True, + 'live_status': 'is_live', + 'upload_date': '20090502', + 'timestamp': 1241276220, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 'only_matching': True, @@ -354,6 +377,7 @@ def _real_extract(self, url): return { 'id': remove_start(media.get('id'), 'ContentItem-') or video_id, 'display_id': video_id, + '_old_archive_ids': [make_archive_id('RaiPlayLive', video_id)] if not re.match(RaiBaseIE._UUID_RE, video_id) else None, 'title': media.get('name'), 'alt_title': strip_or_none(alt_title or None), 'description': media.get('description'), @@ -377,28 +401,6 @@ def _real_extract(self, url): } -class RaiPlayLiveIE(RaiPlayIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/dirette/(?P[^/?#&]+))' - _TESTS = [{ - 'url': 'http://www.raiplay.it/dirette/rainews24', - 'info_dict': { - 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', - 'display_id': 'rainews24', - 'ext': 'mp4', - 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', - 'uploader': 'Rai News 24', - 'creator': 'Rai News 24', - 'is_live': True, - 'live_status': 'is_live', - 'upload_date': '20090502', - 'timestamp': 1241276220, - 'formats': 'count:3', - }, - 'params': {'skip_download': True}, - }] - - class RaiPlayPlaylistIE(InfoExtractor): _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/programmi/(?P[^/?#&]+))(?:/(?P[^?#&]+))?' _TESTS = [{ @@ -463,7 +465,10 @@ def _real_extract(self, url): class RaiPlaySoundIE(RaiBaseIE): - _VALID_URL = rf'(?Phttps?://(?:www\.)?raiplaysound\.it/.+?-(?P{RaiBaseIE._UUID_RE}))\.(?:html|json)' + _VALID_URL = [ + rf'(?Phttps?://(?:www\.)?raiplaysound\.it/.+?-(?P{RaiBaseIE._UUID_RE}))\.(?:html|json)', + r'(?Phttps?://(?:www\.)?raiplaysound\.it/(?P[^/?#&]+)$)', + ] _TESTS = [{ 'url': 'https://www.raiplaysound.it/audio/2021/12/IL-RUGGITO-DEL-CONIGLIO-1ebae2a7-7cdb-42bb-842e-fe0d193e9707.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', @@ -482,7 +487,26 @@ class RaiPlaySoundIE(RaiBaseIE): 'timestamp': 1638346620, 'upload_date': '20211201', }, - 'params': {'skip_download': True}, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.raiplaysound.it/radio2', + 'info_dict': { + 'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44', + 'display_id': 'radio2', + 'ext': 'mp4', + 'title': r're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+', + 'thumbnail': r're:https://www\.raiplaysound\.it/dl/img/.+png', + 'uploader': 'rai radio 2', + 'series': 'Rai Radio 2', + 'creator': 'raiplaysound', + 'is_live': True, + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'live', + }, }] def _real_extract(self, url): @@ -506,6 +530,7 @@ def _real_extract(self, url): **info, 'id': uid or audio_id, 'display_id': audio_id, + '_old_archive_ids': [make_archive_id('RaiPlaySoundLive', audio_id)] if not re.match(RaiBaseIE._UUID_RE, audio_id) else None, 'title': traverse_obj(media, 'title', 'episode_title'), 'alt_title': traverse_obj(media, ('track_info', 'media_name'), expected_type=strip_or_none), 'description': media.get('description'), @@ -521,26 +546,6 @@ def _real_extract(self, url): } -class RaiPlaySoundLiveIE(RaiPlaySoundIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'(?Phttps?://(?:www\.)?raiplaysound\.it/(?P[^/?#&]+)$)' - _TESTS = [{ - 'url': 'https://www.raiplaysound.it/radio2', - 'info_dict': { - 'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44', - 'display_id': 'radio2', - 'ext': 'mp4', - 'title': r're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+', - 'thumbnail': r're:^https://www\.raiplaysound\.it/dl/img/.+\.png', - 'uploader': 'rai radio 2', - 'series': 'Rai Radio 2', - 'creator': 'raiplaysound', - 'is_live': True, - 'live_status': 'is_live', - }, - 'params': {'skip_download': True}, - }] - - class RaiPlaySoundPlaylistIE(InfoExtractor): _VALID_URL = r'(?Phttps?://(?:www\.)?raiplaysound\.it/(?:programmi|playlist|audiolibri)/(?P[^/?#&]+))(?:/(?P[^?#&]+))?' _TESTS = [{