From d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 4 Oct 2022 08:37:48 +1300 Subject: [PATCH] [extractor/JWPlatform] Fix extractor (#5112) Fix bitrate and filesize extraction and support embeds with unquoted urls. Related: #5106 Authored by: coletdjnz --- yt_dlp/extractor/common.py | 3 ++- yt_dlp/extractor/generic.py | 12 ------------ yt_dlp/extractor/jwplatform.py | 31 ++++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index caec0ccf62..0700b4767b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3587,7 +3587,8 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, 'url': source_url, 'width': int_or_none(source.get('width')), 'height': height, - 'tbr': int_or_none(source.get('bitrate')), + 'tbr': int_or_none(source.get('bitrate'), scale=1000), + 'filesize': int_or_none(source.get('filesize')), 'ext': ext, } if source_url.startswith('rtmp'): diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 73aefc7829..73422f937c 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1071,18 +1071,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, - { - # JWPlatform iframe - 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved', - 'info_dict': { - 'id': 'AG26UQXM', - 'ext': 'mp4', - 'upload_date': '20160719', - 'timestamp': 468923808, - 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4', - }, - 'add_ie': ['JWPlatform'], - }, { # Video.js embed, multiple formats 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py index d6b8420a87..c949689430 100644 --- a/yt_dlp/extractor/jwplatform.py +++ b/yt_dlp/extractor/jwplatform.py @@ -22,13 +22,42 @@ class JWPlatformIE(InfoExtractor): 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + # JWPlatform iframe + 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved', + 'info_dict': { + 'id': 'AG26UQXM', + 'ext': 'mp4', + 'upload_date': '20160719', + 'timestamp': 1468923808, + 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720', + 'description': '', + 'duration': 294.0, + }, + }, { + # Player url not surrounded by quotes + 'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin', + 'info_dict': { + 'id': 'R10NQdhY', + 'title': 'Playgirl', + 'ext': 'mp4', + 'upload_date': '20220624', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720', + 'timestamp': 1656064800, + 'description': 'BRD 1966, Will Tremper', + 'duration': 5146.0, + }, + 'params': {'allowed_extractors': ['generic', 'jwplatform']}, + }] + @classmethod def _extract_embed_urls(cls, url, webpage): for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')): # is used by hyland.com # if we find