From d156bc8d59dd469bf70b822926504f213ce237de Mon Sep 17 00:00:00 2001 From: kikuyan Date: Thu, 17 Jun 2021 06:02:06 +0900 Subject: [PATCH 01/17] [orf:tvthek] Add support for MPD formats (closes #28672) (#29236) --- youtube_dl/extractor/orf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index ed8a9a841..8d537d7ae 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor): elif ext == 'f4m': formats.extend(self._extract_f4m_formats( src, video_id, f4m_id=format_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, From 8fe5d54eb721f1bbb8c8a0d18810a42d1257e406 Mon Sep 17 00:00:00 2001 From: kikuyan Date: Thu, 17 Jun 2021 06:12:13 +0900 Subject: [PATCH 02/17] [appleconnect] Fix extraction (#29208) --- youtube_dl/extractor/appleconnect.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py index a84b8b1eb..494f8330c 100644 --- a/youtube_dl/extractor/appleconnect.py +++ b/youtube_dl/extractor/appleconnect.py @@ -9,10 +9,10 @@ from ..utils import ( class AppleConnectIE(InfoExtractor): - _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P[\w-]+)' - _TEST = { + _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P[\w-]+)' + _TESTS = [{ 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', - 'md5': 'e7c38568a01ea45402570e6029206723', + 'md5': 'c1d41f72c8bcaf222e089434619316e4', 'info_dict': { 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'ext': 'm4v', @@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor): 'upload_date': '20150710', 'timestamp': 1436545535, }, - } + }, { + 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor): video_data = self._parse_json(video_json, video_id) timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) - like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count')) + like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None)) return { 'id': video_id, From a7f61feab2dbfc50a7ebe8b0ea390bd0e5edf77a Mon Sep 17 00:00:00 2001 From: kikuyan Date: Thu, 17 Jun 2021 12:34:33 +0900 Subject: [PATCH 03/17] [egghead] Add support for app.egghead.io (closes #28404) (#29303) Co-authored-by: Sergey M. --- youtube_dl/extractor/egghead.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index aff9b88c0..9bbd703e0 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor): class EggheadCourseIE(EggheadBaseIE): IE_DESC = 'egghead.io course' IE_NAME = 'egghead:course' - _VALID_URL = r'https://egghead\.io/courses/(?P[^/?#&]+)' - _TEST = { + _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P[^/?#&]+)' + _TESTS = [{ 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', 'playlist_count': 29, 'info_dict': { - 'id': '72', + 'id': '432655', 'title': 'Professor Frisby Introduces Composable Functional JavaScript', 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$', }, - } + }, { + 'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript', + 'only_matching': True, + }] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE): class EggheadLessonIE(EggheadBaseIE): IE_DESC = 'egghead.io lesson' IE_NAME = 'egghead:lesson' - _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P[^/?#&]+)' + _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', 'info_dict': { @@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE): }, { 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application', 'only_matching': True, + }, { + 'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', + 'only_matching': True, }] def _real_extract(self, url): From 3a7ef27cf306a0a8f79ebd78ae60329c53080d14 Mon Sep 17 00:00:00 2001 From: kikuyan Date: Mon, 21 Jun 2021 01:58:19 +0900 Subject: [PATCH 04/17] [postprocessor/ffmpeg] Show ffmpeg output on error (refs #22680) (#29336) --- youtube_dl/postprocessor/ffmpeg.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 5f7298345..9f76c9d4e 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -231,7 +231,10 @@ class FFmpegPostProcessor(PostProcessor): stdout, stderr = p.communicate() if p.returncode != 0: stderr = stderr.decode('utf-8', 'replace') - msg = stderr.strip().split('\n')[-1] + msgs = stderr.strip().split('\n') + msg = msgs[-1] + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] ' + '\n'.join(msgs[:-1])) raise FFmpegPostProcessorError(msg) self.try_utime(out_path, oldest_mtime, oldest_mtime) From 57b9a4b4c6cf2580b5007db78bd333a9a237fd47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Jun 2021 00:36:28 +0700 Subject: [PATCH 05/17] [nrk] Switch psapi URL to https (closes #29344) Catalog calls no longer work via http --- youtube_dl/extractor/nrk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 40dee2162..6d01a25c3 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor): def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None): return self._download_json( - urljoin('http://psapi.nrk.no/', path), + urljoin('https://psapi.nrk.no/', path), video_id, note or 'Downloading %s JSON' % item, fatal=fatal, query=query, headers={'Accept-Encoding': 'gzip, deflate, br'}) From cc21aebe9071660ba558dae75c3066a4a3b38820 Mon Sep 17 00:00:00 2001 From: Logan B Date: Mon, 21 Jun 2021 05:41:14 +1200 Subject: [PATCH 06/17] [umg:de] Update GraphQL API URL (#29304) Previous one no longer resolves Co-authored-by: Sergey M. --- youtube_dl/extractor/umg.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/umg.py b/youtube_dl/extractor/umg.py index d815cd9a6..47948b6ce 100644 --- a/youtube_dl/extractor/umg.py +++ b/youtube_dl/extractor/umg.py @@ -28,7 +28,7 @@ class UMGDeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - 'https://api.universal-music.de/graphql', + 'https://graphql.universal-music.de/', video_id, query={ 'query': '''{ universalMusic(channel:16) { @@ -56,11 +56,9 @@ class UMGDeIE(InfoExtractor): formats = [] def add_m3u8_format(format_id): - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( hls_url_template % format_id, video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal='False') - if m3u8_formats and m3u8_formats[0].get('height'): - formats.extend(m3u8_formats) + 'm3u8_native', m3u8_id='hls', fatal=False)) for f in video_data.get('formats', []): f_url = f.get('url') From 41317030017418c89742594a80c0596c4b26bbb9 Mon Sep 17 00:00:00 2001 From: bopol Date: Sun, 20 Jun 2021 19:42:09 +0200 Subject: [PATCH 07/17] [youtube] Update invidious instance list (#29281) --- youtube_dl/extractor/youtube.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bf858c39d..35058950a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -353,7 +353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'(?:www\.)?invidious\.13ad\.de', r'(?:www\.)?invidious\.mastodon\.host', r'(?:www\.)?invidious\.zapashcanon\.fr', - r'(?:www\.)?invidious\.kavin\.rocks', + r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks', r'(?:www\.)?invidious\.tinfoil-hat\.net', r'(?:www\.)?invidious\.himiko\.cloud', r'(?:www\.)?invidious\.reallyancient\.tech', @@ -380,6 +380,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'(?:www\.)?invidious\.toot\.koeln', r'(?:www\.)?invidious\.fdn\.fr', r'(?:www\.)?watch\.nettohikari\.com', + r'(?:www\.)?invidious\.namazso\.eu', + r'(?:www\.)?invidious\.silkky\.cloud', + r'(?:www\.)?invidious\.exonip\.de', + r'(?:www\.)?invidious\.riverside\.rocks', + r'(?:www\.)?invidious\.blamefran\.net', + r'(?:www\.)?invidious\.moomoo\.de', + r'(?:www\.)?ytb\.trom\.tf', + r'(?:www\.)?yt\.cyberhost\.uk', r'(?:www\.)?kgg2m7yk5aybusll\.onion', r'(?:www\.)?qklhadlycap4cnod\.onion', r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion', @@ -388,6 +396,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion', r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p', r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion', + r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion', + r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion', + r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion', + r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', ) _VALID_URL = r"""(?x)^ ( From 4c77a2e538fb23da116aaba0f51e314ef76feb68 Mon Sep 17 00:00:00 2001 From: Tianyi Shi Date: Sun, 20 Jun 2021 19:03:21 +0100 Subject: [PATCH 08/17] [bilibili] Strip uploader name (#29202) --- youtube_dl/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 589fdc1ce..bff6ea194 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -233,7 +233,7 @@ class BiliBiliIE(InfoExtractor): webpage) if uploader_mobj: info.update({ - 'uploader': uploader_mobj.group('name'), + 'uploader': uploader_mobj.group('name').strip(), 'uploader_id': uploader_mobj.group('id'), }) if not info.get('uploader'): From 03ab02730f77da5b7ad05ca78ff1624d8226ec5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Jun 2021 01:34:27 +0700 Subject: [PATCH 09/17] [youtube] Workaround for get_video_info request (refs #29333) See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544 --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 35058950a..e68214008 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1512,6 +1512,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'html5': 1, + # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544 + 'c': 'TVHTML5', + 'cver': '6.20180913', }, fatal=False)), lambda x: x['player_response'][0], compat_str) or '{}', video_id) From 47f2f2fbe9730b041b91451d17279216f311ffc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Jun 2021 01:35:21 +0700 Subject: [PATCH 10/17] [youtube] Make get_video_info processing more robust (closes #29333) --- youtube_dl/extractor/youtube.py | 35 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e68214008..dc4bd4a77 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1504,22 +1504,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor): playability_status = player_response.get('playabilityStatus') or {} if playability_status.get('reason') == 'Sign in to confirm your age': - pr = self._parse_json(try_get(compat_parse_qs( - self._download_webpage( - base_url + 'get_video_info', video_id, - 'Refetching age-gated info webpage', - 'unable to download video info webpage', query={ - 'video_id': video_id, - 'eurl': 'https://youtube.googleapis.com/v/' + video_id, - 'html5': 1, - # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544 - 'c': 'TVHTML5', - 'cver': '6.20180913', - }, fatal=False)), - lambda x: x['player_response'][0], - compat_str) or '{}', video_id) - if pr: - player_response = pr + video_info = self._download_webpage( + base_url + 'get_video_info', video_id, + 'Refetching age-gated info webpage', + 'unable to download video info webpage', query={ + 'video_id': video_id, + 'eurl': 'https://youtube.googleapis.com/v/' + video_id, + 'html5': 1, + # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544 + 'c': 'TVHTML5', + 'cver': '6.20180913', + }, fatal=False) + if video_info: + pr = self._parse_json( + try_get( + compat_parse_qs(video_info), + lambda x: x['player_response'][0], compat_str) or '{}', + video_id, fatal=False) + if pr and isinstance(pr, dict): + player_response = pr trailer_video_id = try_get( playability_status, From 2ccee8db74c36eb1254cdffd4e691e56c0ce0724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Jun 2021 01:54:52 +0700 Subject: [PATCH 11/17] [curiositystream:collection] Extend _VALID_URL (closes #26326, closes #29117) --- youtube_dl/extractor/curiositystream.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py index ae64a07d7..48ff30432 100644 --- a/youtube_dl/extractor/curiositystream.py +++ b/youtube_dl/extractor/curiositystream.py @@ -145,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): IE_NAME = 'curiositystream:collection' - _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P\d+)' + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P\d+)' _TESTS = [{ 'url': 'https://app.curiositystream.com/collection/2', 'info_dict': { @@ -157,6 +157,9 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): }, { 'url': 'https://curiositystream.com/series/2', 'only_matching': True, + }, { + 'url': 'https://curiositystream.com/collections/36', + 'only_matching': True, }] def _real_extract(self, url): From da32828208743c8012c8eea01780cbf9b3f60436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Jun 2021 03:22:37 +0700 Subject: [PATCH 12/17] [pornhub] Dismiss tbr extracted from download URLs (closes #28927) No longer reliable --- youtube_dl/extractor/pornhub.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 031454600..10516ee5a 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -408,17 +408,14 @@ class PornHubIE(PornHubBaseIE): format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) return - tbr = None - mobj = re.search(r'(?P\d+)[pP]?_(?P\d+)[kK]', format_url) - if mobj: - if not height: - height = int(mobj.group('height')) - tbr = int(mobj.group('tbr')) + if not height: + height = int_or_none(self._search_regex( + r'(?P\d+)[pP]?_\d+[kK]', format_url, 'height', + default=None)) formats.append({ 'url': format_url, 'format_id': '%dp' % height if height else None, 'height': height, - 'tbr': tbr, }) for video_url, height in video_urls: @@ -440,7 +437,8 @@ class PornHubIE(PornHubBaseIE): add_format(video_url, height) continue add_format(video_url) - self._sort_formats(formats) + self._sort_formats( + formats, field_preference=('height', 'width', 'fps', 'format_id')) video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', From 751c9ae39a0bb9c66eca888a12595624db00bf16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Jun 2021 03:33:43 +0700 Subject: [PATCH 13/17] [pornhub] Detect geo restriction --- youtube_dl/extractor/pornhub.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 10516ee5a..d74e69ed9 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -236,6 +236,10 @@ class PornHubIE(PornHubBaseIE): }, { 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3', 'only_matching': True, + }, { + # geo restricted + 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156', + 'only_matching': True, }] @staticmethod @@ -275,6 +279,11 @@ class PornHubIE(PornHubBaseIE): 'PornHub said: %s' % error_msg, expected=True, video_id=video_id) + if any(re.search(p, webpage) for p in ( + r'class=["\']geoBlocked["\']', + r'>\s*This content is unavailable in your country')): + self.raise_geo_restricted() + # video_title from flashvars contains whitespace instead of non-ASCII (see # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # on that anymore. From cb668eb973b8f09152bb48e3b49a014d3cb72b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Jun 2021 04:08:15 +0700 Subject: [PATCH 14/17] [pornhub] Add support for pornhubthbh7ap3u.onion --- youtube_dl/extractor/pornhub.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index d74e69ed9..e2e1500ff 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -30,6 +30,7 @@ from ..utils import ( class PornHubBaseIE(InfoExtractor): _NETRC_MACHINE = 'pornhub' + _PORNHUB_HOST_RE = r'(?:(?Ppornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)' def _download_webpage_handle(self, *args, **kwargs): def dl(*args, **kwargs): @@ -122,11 +123,13 @@ class PornHubIE(PornHubBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:[^/]+\.)?(?Ppornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:[^/]+\.)? + %s + /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P[\da-z]+) - ''' + ''' % PornHubBaseIE._PORNHUB_HOST_RE _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': 'a6391306d050e4547f62b3f485dd9ba9', @@ -240,6 +243,9 @@ class PornHubIE(PornHubBaseIE): # geo restricted 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156', 'only_matching': True, + }, { + 'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156', + 'only_matching': True, }] @staticmethod @@ -520,7 +526,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?(?Ppornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, @@ -549,6 +555,9 @@ class PornHubUserIE(PornHubPlaylistBaseIE): # Same as before, multi page 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau', 'only_matching': True, + }, { + 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph', + 'only_matching': True, }] def _real_extract(self, url): @@ -624,7 +633,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub(?:premium)?\.(?:com|net|org))/(?P(?:[^/]+/)*[^/?#&]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, @@ -729,6 +738,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://de.pornhub.com/playlist/4667351', 'only_matching': True, + }, { + 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos', + 'only_matching': True, }] @classmethod @@ -739,7 +751,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?(?Ppornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos/upload)' + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE _TESTS = [{ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'info_dict': { @@ -749,4 +761,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', 'only_matching': True, + }, { + 'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload', + 'only_matching': True, }] From 379f52a4954013767219d25099cce9e0f9401961 Mon Sep 17 00:00:00 2001 From: Aleri Kaisattera <73682764+alerikaisattera@users.noreply.github.com> Date: Mon, 21 Jun 2021 03:23:50 +0600 Subject: [PATCH 15/17] [liveleak] Remove extractor (closes #17625, closes #24222) (#29331) --- youtube_dl/extractor/extractors.py | 4 - youtube_dl/extractor/generic.py | 31 ----- youtube_dl/extractor/liveleak.py | 191 ----------------------------- 3 files changed, 226 deletions(-) delete mode 100644 youtube_dl/extractor/liveleak.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 402e542ae..6e8fc3961 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -610,10 +610,6 @@ from .linkedin import ( from .linuxacademy import LinuxAcademyIE from .litv import LiTVIE from .livejournal import LiveJournalIE -from .liveleak import ( - LiveLeakIE, - LiveLeakEmbedIE, -) from .livestream import ( LivestreamIE, LivestreamOriginalIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 87594534f..a9c064105 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE from .arkena import ArkenaIE from .instagram import InstagramIE -from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE from .kaltura import KalturaIE @@ -1629,31 +1628,6 @@ class GenericIE(InfoExtractor): 'upload_date': '20160409', }, }, - # LiveLeak embed - { - 'url': 'http://www.wykop.pl/link/3088787/', - 'md5': '7619da8c820e835bef21a1efa2a0fc71', - 'info_dict': { - 'id': '874_1459135191', - 'ext': 'mp4', - 'title': 'Man shows poor quality of new apartment building', - 'description': 'The wall is like a sand pile.', - 'uploader': 'Lake8737', - }, - 'add_ie': [LiveLeakIE.ie_key()], - }, - # Another LiveLeak embed pattern (#13336) - { - 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/', - 'info_dict': { - 'id': '2eb_1496309988', - 'ext': 'mp4', - 'title': 'Thief robs place where everyone was armed', - 'description': 'md5:694d73ee79e535953cf2488562288eee', - 'uploader': 'brazilwtf', - }, - 'add_ie': [LiveLeakIE.ie_key()], - }, # Duplicated embedded video URLs { 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443', @@ -3179,11 +3153,6 @@ class GenericIE(InfoExtractor): return self.url_result( self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()) - # Look for LiveLeak embeds - liveleak_urls = LiveLeakIE._extract_urls(webpage) - if liveleak_urls: - return self.playlist_from_matches(liveleak_urls, video_id, video_title) - # Look for 3Q SDN embeds threeqsdn_url = ThreeQSDNIE._extract_url(webpage) if threeqsdn_url: diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py deleted file mode 100644 index 4ac437c8b..000000000 --- a/youtube_dl/extractor/liveleak.py +++ /dev/null @@ -1,191 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class LiveLeakIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P[\w_]+)' - _TESTS = [{ - 'url': 'http://www.liveleak.com/view?i=757_1364311680', - 'md5': '0813c2430bea7a46bf13acf3406992f4', - 'info_dict': { - 'id': '757_1364311680', - 'ext': 'mp4', - 'description': 'extremely bad day for this guy..!', - 'uploader': 'ljfriel2', - 'title': 'Most unlucky car accident', - 'thumbnail': r're:^https?://.*\.jpg$' - } - }, { - 'url': 'http://www.liveleak.com/view?i=f93_1390833151', - 'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf', - 'info_dict': { - 'id': 'f93_1390833151', - 'ext': 'mp4', - 'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.', - 'uploader': 'ARD_Stinkt', - 'title': 'German Television does first Edward Snowden Interview (ENGLISH)', - 'thumbnail': r're:^https?://.*\.jpg$' - } - }, { - # Prochan embed - 'url': 'http://www.liveleak.com/view?i=4f7_1392687779', - 'md5': '42c6d97d54f1db107958760788c5f48f', - 'info_dict': { - 'id': '4f7_1392687779', - 'ext': 'mp4', - 'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.", - 'uploader': 'CapObveus', - 'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck', - 'age_limit': 18, - }, - 'skip': 'Video is dead', - }, { - # Covers https://github.com/ytdl-org/youtube-dl/pull/5983 - # Multiple resolutions - 'url': 'http://www.liveleak.com/view?i=801_1409392012', - 'md5': 'c3a449dbaca5c0d1825caecd52a57d7b', - 'info_dict': { - 'id': '801_1409392012', - 'ext': 'mp4', - 'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.', - 'uploader': 'bony333', - 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia', - 'thumbnail': r're:^https?://.*\.jpg$' - } - }, { - # Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521 - 'url': 'http://m.liveleak.com/view?i=763_1473349649', - 'add_ie': ['Youtube'], - 'info_dict': { - 'id': '763_1473349649', - 'ext': 'mp4', - 'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty', - 'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.', - 'uploader': 'Ziz', - 'upload_date': '20160908', - 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw' - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.liveleak.com/view?i=677_1439397581', - 'info_dict': { - 'id': '677_1439397581', - 'title': 'Fuel Depot in China Explosion caught on video', - }, - 'playlist_count': 3, - }, { - 'url': 'https://www.liveleak.com/view?t=HvHi_1523016227', - 'only_matching': True, - }, { - # No original video - 'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"', - webpage) - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip() - video_description = self._og_search_description(webpage) - video_uploader = self._html_search_regex( - r'By:.*?(\w+)', webpage, 'uploader', fatal=False) - age_limit = int_or_none(self._search_regex( - r'you confirm that you are ([0-9]+) years and over.', - webpage, 'age limit', default=None)) - video_thumbnail = self._og_search_thumbnail(webpage) - - entries = self._parse_html5_media_entries(url, webpage, video_id) - if not entries: - # Maybe an embed? - embed_url = self._search_regex( - r']+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"', - webpage, 'embed URL') - return { - '_type': 'url_transparent', - 'url': embed_url, - 'id': video_id, - 'title': video_title, - 'description': video_description, - 'uploader': video_uploader, - 'age_limit': age_limit, - } - - for idx, info_dict in enumerate(entries): - formats = [] - for a_format in info_dict['formats']: - if not a_format.get('height'): - a_format['height'] = int_or_none(self._search_regex( - r'([0-9]+)p\.mp4', a_format['url'], 'height label', - default=None)) - formats.append(a_format) - - # Removing '.*.mp4' gives the raw video, which is essentially - # the same video without the LiveLeak logo at the top (see - # https://github.com/ytdl-org/youtube-dl/pull/4768) - orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url']) - if a_format['url'] != orig_url: - format_id = a_format.get('format_id') - format_id = 'original' + ('-' + format_id if format_id else '') - if self._is_valid_url(orig_url, video_id, format_id): - formats.append({ - 'format_id': format_id, - 'url': orig_url, - 'preference': 1, - }) - self._sort_formats(formats) - info_dict['formats'] = formats - - # Don't append entry ID for one-video pages to keep backward compatibility - if len(entries) > 1: - info_dict['id'] = '%s_%s' % (video_id, idx + 1) - else: - info_dict['id'] = video_id - - info_dict.update({ - 'title': video_title, - 'description': video_description, - 'uploader': video_uploader, - 'age_limit': age_limit, - 'thumbnail': video_thumbnail, - }) - - return self.playlist_result(entries, video_id, video_title) - - -class LiveLeakEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P[ift])=(?P[\w_]+)' - - # See generic.py for actual test cases - _TESTS = [{ - 'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191', - 'only_matching': True, - }, { - 'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1', - 'only_matching': True, - }] - - def _real_extract(self, url): - kind, video_id = re.match(self._VALID_URL, url).groups() - - if kind == 'f': - webpage = self._download_webpage(url, video_id) - liveleak_url = self._search_regex( - r'(?:logourl\s*:\s*|window\.open\()(?P[\'"])(?P%s)(?P=q1)' % LiveLeakIE._VALID_URL, - webpage, 'LiveLeak URL', group='url') - else: - liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id) - - return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key()) From 7fb9564420d43252c8f8c453d4dc54bf3ff9f8ee Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Jun 2021 20:06:33 +0100 Subject: [PATCH 16/17] [periscope] pass referer to HLS requests(closes #29419) --- youtube_dl/extractor/periscope.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index b15906390..b93a02b7d 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -12,6 +12,10 @@ from ..utils import ( class PeriscopeBaseIE(InfoExtractor): + _M3U8_HEADERS = { + 'Referer': 'https://www.periscope.tv/' + } + def _call_api(self, method, query, item_id): return self._download_json( 'https://api.periscope.tv/api/v2/%s' % method, @@ -54,9 +58,11 @@ class PeriscopeBaseIE(InfoExtractor): m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native' if state in ('ended', 'timed_out') else 'm3u8', - m3u8_id=format_id, fatal=fatal) + m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS) if len(m3u8_formats) == 1: self._add_width_and_height(m3u8_formats[0], width, height) + for f in m3u8_formats: + f.setdefault('http_headers', {}).update(self._M3U8_HEADERS) return m3u8_formats From a8035827177d6b59aca03bd717acb6a9bdd75ada Mon Sep 17 00:00:00 2001 From: bopol Date: Thu, 1 Jul 2021 08:53:22 +0200 Subject: [PATCH 17/17] [peertube] only call description endpoint if necessary (#29383) --- youtube_dl/extractor/peertube.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index d9b13adc2..3af533925 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -569,15 +569,15 @@ class PeerTubeIE(InfoExtractor): formats.append(f) self._sort_formats(formats) - full_description = self._call_api( - host, video_id, 'description', note='Downloading description JSON', - fatal=False) + description = video.get('description') + if len(description) >= 250: + # description is shortened + full_description = self._call_api( + host, video_id, 'description', note='Downloading description JSON', + fatal=False) - description = None - if isinstance(full_description, dict): - description = str_or_none(full_description.get('description')) - if not description: - description = video.get('description') + if isinstance(full_description, dict): + description = str_or_none(full_description.get('description')) or description subtitles = self.extract_subtitles(host, video_id)