From 163281178a61565cd592426d452978ff47e63439 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 21 Sep 2022 20:53:08 +0000 Subject: [PATCH] [extractor/wistia] Match IDs in embed URLs (#4990) Closes #4985 Authored by: bashonly --- yt_dlp/extractor/generic.py | 35 +++++++++++++++++++++++++---------- yt_dlp/extractor/wistia.py | 16 ++++++++++++++++ 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 828c8a6cff..fadc0819b0 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -876,17 +876,19 @@ class GenericIE(InfoExtractor): # Wistia embed { 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', - 'md5': '1953f3a698ab51cfc948ed3992a0b7ff', + 'md5': 'b9676d24bf30945d97060638fbfe77f0', 'info_dict': { - 'id': '6e2wtrbdaf', - 'ext': 'mov', - 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england', - 'description': 'a Paywall Videos video from Remilon', - 'duration': 644.072, + 'id': '5vd7p4bct5', + 'ext': 'bin', + 'title': 'md5:db27290a04ae306319b0b5cce3cdf7bd', + 'description': 'md5:e835b7808e11aaef29ccdc28888437af', + 'duration': 623.019, 'uploader': 'study.com', - 'timestamp': 1459678540, - 'upload_date': '20160403', - 'filesize': 24687186, + 'timestamp': 1663258727, + 'upload_date': '20220915', + 'filesize': 29798093, + 'age_limit': 0, + 'thumbnail': r're:^https?://.+\.jpg$', }, }, # Wistia standard embed (async) @@ -903,7 +905,20 @@ class GenericIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, + 'skip': 'webpage 404 not found', + }, + # Wistia embed with video IDs in query + { + 'url': 'https://amplitude.com/amplify-sessions?amp%5Bwmediaid%5D=pz0m0l0if3&%5Bwvideo%5D=pz0m0l0if3&wchannelid=emyjmwjf79&wmediaid=i8um783bdt', + 'info_dict': { + 'id': 'md5:922795280019b3a70ca133330a4b0108', + 'title': 'Amplify Sessions - Amplitude', + 'description': 'md5:3d271bdee219417bb1c35eeb0937b923', + 'age_limit': 0, + 'thumbnail': r're:^https?://.+\.jpg$', + }, + 'playlist_count': 3, }, # Soundcloud embed { diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index 4388286241..ba7497493f 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -131,6 +131,20 @@ class WistiaIE(WistiaBaseIE): 'timestamp': 1463607249, 'duration': 4987.11, }, + 'skip': 'webpage 404 not found', + }, { + 'url': 'wistia:5vd7p4bct5', + 'md5': 'b9676d24bf30945d97060638fbfe77f0', + 'info_dict': { + 'id': '5vd7p4bct5', + 'ext': 'bin', + 'title': 'md5:eaa9f64c4efd7b5f098b9b6118597679', + 'description': 'md5:a9bea0315f0616aa5df2dc413ddcdd0f', + 'upload_date': '20220915', + 'timestamp': 1663258727, + 'duration': 623.019, + 'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.(?:jpg|bin)$', + }, }, { 'url': 'wistia:sh7fpupwlt', 'only_matching': True, @@ -157,6 +171,8 @@ def _extract_embed_urls(cls, url, webpage): urls.append('wistia:%s' % match.group('id')) for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P[a-z0-9]{10})', webpage): urls.append('wistia:%s' % match.group('id')) + for match in re.finditer(r'(?:wmediaid|wvideo(?:id)?)(?:%5D)?=(?P[a-z0-9]{10})', url): + urls.append('wistia:%s' % match.group('id')) return urls @classmethod