diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 7c5d35f473..07b1b18611 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -4,7 +4,7 @@ class AmazonStoreIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P[^/&#$?]+)' + _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P[^/&#$?]+)' _TESTS = [{ 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 7287677c11..51d30a3213 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -11,7 +11,7 @@ class CanalAlphaIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P\d+)/?.*' + _VALID_URL = r'https?://(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P\d+)/?.*' _TESTS = [{ 'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021', diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py index 6bdc4f6bbb..e6841fb8b2 100644 --- a/yt_dlp/extractor/chingari.py +++ b/yt_dlp/extractor/chingari.py @@ -67,7 +67,7 @@ def _get_post(self, id, post_data): class ChingariIE(ChingariBaseIE): - _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/share/post\?id=(?P[^&/#?]+)' + _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P[^&/#?]+)' _TESTS = [{ 'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb', 'info_dict': { @@ -102,7 +102,7 @@ def _real_extract(self, url): class ChingariUserIE(ChingariBaseIE): - _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/(?!share/post)(?P[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P[^/?]+)' _TESTS = [{ 'url': 'https://chingari.io/dada1023', 'playlist_mincount': 3, diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py index 868d8d27da..d49f1ca744 100644 --- a/yt_dlp/extractor/cozytv.py +++ b/yt_dlp/extractor/cozytv.py @@ -6,7 +6,7 @@ class CozyTVIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?cozy\.tv/(?P[^/]+)/replays/(?P[^/$#&?]+)' + _VALID_URL = r'https?://(?:www\.)?cozy\.tv/(?P[^/]+)/replays/(?P[^/$#&?]+)' _TESTS = [{ 'url': 'https://cozy.tv/beardson/replays/2021-11-19_1', diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py index b4e544d4f6..cd19325bc7 100644 --- a/yt_dlp/extractor/epicon.py +++ b/yt_dlp/extractor/epicon.py @@ -8,7 +8,7 @@ class EpiconIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.epicon.in/documentaries/air-battle-of-srinagar', 'info_dict': { @@ -84,7 +84,7 @@ def _real_extract(self, url): class EpiconSeriesIE(InfoExtractor): - _VALID_URL = r'(?!.*season)(?:https?://)(?:www\.)?epicon\.in/tv-shows/(?P[^/?#]+)' + _VALID_URL = r'(?!.*season)https?://(?:www\.)?epicon\.in/tv-shows/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.epicon.in/tv-shows/1-of-something', 'playlist_mincount': 5, diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 3980c2349f..2759e7436f 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -10,7 +10,7 @@ class EUScreenIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P[^&?$/]+)' + _VALID_URL = r'https?://(?:www\.)?euscreen\.eu/item.html\?id=(?P[^&?$/]+)' _TESTS = [{ 'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C', diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index bde6e86248..9ba0b1ca19 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -15,7 +15,7 @@ class GabTVIE(InfoExtractor): - _VALID_URL = r'(?:https?://)tv.gab.com/channel/[^/]+/view/(?P[a-z0-9-]+)' + _VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P[a-z0-9-]+)' _TESTS = [{ 'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488', 'info_dict': { diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index a7792a5e0e..58cd595113 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -6,7 +6,7 @@ class GronkhIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?gronkh\.tv/stream/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/stream/(?P\d+)' _TESTS = [{ 'url': 'https://gronkh.tv/stream/536', diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 0bdf772a19..de2b30cf7c 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -296,7 +296,7 @@ def _real_extract(self, url): class HotStarSeriesIE(HotStarBaseIE): IE_NAME = 'hotstar:series' - _VALID_URL = r'(?P(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P\d+))' + _VALID_URL = r'(?Phttps?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P\d+))' _TESTS = [{ 'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646', 'info_dict': { diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py index 1706b28a0d..088db1cb01 100644 --- a/yt_dlp/extractor/koo.py +++ b/yt_dlp/extractor/koo.py @@ -8,7 +8,7 @@ class KooIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?kooapp\.com/koo/[^/]+/(?P[^/&#$?]+)' + _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P[^/&#$?]+)' _TESTS = [{ # Test for video in the comments 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde', 'info_dict': { diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py index 0f0b09e2c4..1d6d4b8040 100644 --- a/yt_dlp/extractor/mlssoccer.py +++ b/yt_dlp/extractor/mlssoccer.py @@ -6,7 +6,7 @@ class MLSSoccerIE(InfoExtractor): _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)' - _VALID_URL = r'(?:https?://)(?:www\.)?%s/video/#?(?P[^/&$#?]+)' % _VALID_DOMAINS + _VALID_URL = r'https?://(?:www\.)?%s/video/#?(?P[^/&$#?]+)' % _VALID_DOMAINS _TESTS = [{ 'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986', diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py index dcd26388a6..09fadf8d90 100644 --- a/yt_dlp/extractor/musescore.py +++ b/yt_dlp/extractor/musescore.py @@ -5,7 +5,7 @@ class MuseScoreIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P[^#&?]+)' + _VALID_URL = r'https?://(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P[^#&?]+)' _TESTS = [{ 'url': 'https://musescore.com/user/73797/scores/142975', 'info_dict': { @@ -13,7 +13,7 @@ class MuseScoreIE(InfoExtractor): 'ext': 'mp3', 'title': 'WA Mozart Marche Turque (Turkish March fingered)', 'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be', - 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'PapyPiano', 'creator': 'Wolfgang Amadeus Mozart', } @@ -24,7 +24,7 @@ class MuseScoreIE(InfoExtractor): 'ext': 'mp3', 'title': 'Sweet Child O\' Mine – Guns N\' Roses sweet child', 'description': 'md5:4dca71191c14abc312a0a4192492eace', - 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'roxbelviolin', 'creator': 'Guns N´Roses Arr. Roxbel Violin', } @@ -35,7 +35,7 @@ class MuseScoreIE(InfoExtractor): 'ext': 'mp3', 'title': 'Für Elise – Beethoven', 'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34', - 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'ClassicMan', 'creator': 'Ludwig van Beethoven (1770–1827)', } diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py index 5874556e34..3c2afd838d 100644 --- a/yt_dlp/extractor/mxplayer.py +++ b/yt_dlp/extractor/mxplayer.py @@ -180,7 +180,7 @@ def _real_extract(self, url): class MxplayerShowIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?mxplayer\.in/show/(?P[-\w]+)-(?P\w+)/?(?:$|[#?])' + _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/show/(?P[-\w]+)-(?P\w+)/?(?:$|[#?])' _TESTS = [{ 'url': 'https://www.mxplayer.in/show/watch-chakravartin-ashoka-samrat-series-online-a8f44e3cc0814b5601d17772cedf5417', 'playlist_mincount': 440, diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py index 79501003db..826faadd2e 100644 --- a/yt_dlp/extractor/onefootball.py +++ b/yt_dlp/extractor/onefootball.py @@ -5,7 +5,7 @@ class OneFootballIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P\d+)' _TESTS = [{ 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334', diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py index d1d9911f7d..07ac15b540 100644 --- a/yt_dlp/extractor/planetmarathi.py +++ b/yt_dlp/extractor/planetmarathi.py @@ -9,7 +9,7 @@ class PlanetMarathiIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?planetmarathi\.com/titles/(?P[^/#&?$]+)' + _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P[^/#&?$]+)' _TESTS = [{ 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', 'playlist_mincount': 2, diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py index 1d832a6796..9e9867ba5d 100644 --- a/yt_dlp/extractor/projectveritas.py +++ b/yt_dlp/extractor/projectveritas.py @@ -10,7 +10,7 @@ class ProjectVeritasIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?projectveritas\.com/(?Pnews|video)/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?Pnews|video)/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/', 'info_dict': { diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index 142d5dc3a0..00a5b00cdd 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -16,7 +16,7 @@ class ShemarooMeIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P[^?#]+)' _TESTS = [{ 'url': 'https://www.shemaroome.com/movies/dil-hai-tumhaara', 'info_dict': { @@ -78,7 +78,7 @@ def _real_extract(self, url): iv = [0] * 16 m3u8_url = intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv)) m3u8_url = m3u8_url[:-compat_ord((m3u8_url[-1]))].decode('ascii') - formats = self._extract_m3u8_formats(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']}) + formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']}) self._sort_formats(formats) release_date = self._html_search_regex( @@ -91,6 +91,7 @@ def _real_extract(self, url): subtitles.setdefault('EN', []).append({ 'url': self._proto_relative_url(sub_url), }) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) description = self._html_search_regex(r'(?s)>Synopsis([a-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P[a-z0-9]+)' _TESTS = [{ 'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71', diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py index 60e84529d8..fe6a9554a9 100644 --- a/yt_dlp/extractor/threespeak.py +++ b/yt_dlp/extractor/threespeak.py @@ -11,7 +11,7 @@ class ThreeSpeakIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P[^/$&#?]+)' + _VALID_URL = r'https?://(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P[^/$&#?]+)' _TESTS = [{ 'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy', @@ -75,7 +75,7 @@ def _real_extract(self, url): class ThreeSpeakUserIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/user/(?P[^/$&?#]+)' + _VALID_URL = r'https?://(?:www\.)?3speak\.tv/user/(?P[^/$&?#]+)' _TESTS = [{ 'url': 'https://3speak.tv/user/theycallmedan', diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py index 4a25f0c55c..4986635f24 100644 --- a/yt_dlp/extractor/utreon.py +++ b/yt_dlp/extractor/utreon.py @@ -13,7 +13,7 @@ class UtreonIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?utreon.com/v/(?P[a-zA-Z0-9_-]+)' + _VALID_URL = r'https?://(?:www\.)?utreon.com/v/(?P[a-zA-Z0-9_-]+)' _TESTS = [{ 'url': 'https://utreon.com/v/z_I7ikQbuDw', 'info_dict': { diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py index e2944ec635..a9b66b95c2 100644 --- a/yt_dlp/extractor/voot.py +++ b/yt_dlp/extractor/voot.py @@ -15,7 +15,7 @@ class VootIE(InfoExtractor): _VALID_URL = r'''(?x) (?: voot:| - (?:https?://)(?:www\.)?voot\.com/? + https?://(?:www\.)?voot\.com/? (?: movies/[^/]+/| (?:shows|kids)/(?:[^/]+/){4} diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 5366041674..462bc4efe6 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -21,7 +21,7 @@ class Zee5IE(InfoExtractor): _VALID_URL = r'''(?x) (?: zee5:| - (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)? + https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? (?: (?:tvshows|kids|zee5originals)(?:/[^#/?]+){3} |movies/[^#/?]+ @@ -174,7 +174,7 @@ class Zee5SeriesIE(InfoExtractor): _VALID_URL = r'''(?x) (?: zee5:series:| - (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)? + https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? (?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/ ) (?P[^#/?]+)/?(?:$|[?#])