From 98a3cb082314e182809be7c0c096f753dcc28604 Mon Sep 17 00:00:00 2001 From: pikadoramon Date: Wed, 28 Jun 2023 19:42:33 +0800 Subject: [PATCH 1/6] [extractor/JdItemVideo] Add Extractor. To extract the video links from a JD.com product page --- yt_dlp/extractor/_extractors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 06340fcd8d..296e17c68b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -879,6 +879,9 @@ SangiinInstructionIE, SangiinIE, ) + +from .jditemvideo import JdItemVideoIE + from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .joj import JojIE From 314fce0c430b186426fabc9cc89d96f066943e6d Mon Sep 17 00:00:00 2001 From: pikadoramon Date: Wed, 28 Jun 2023 19:43:32 +0800 Subject: [PATCH 2/6] [extractor/JdItemVideo] Add Extractor. To extract the video links from a JD.com product page --- yt_dlp/extractor/jditemvideo.py | 98 +++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 yt_dlp/extractor/jditemvideo.py diff --git a/yt_dlp/extractor/jditemvideo.py b/yt_dlp/extractor/jditemvideo.py new file mode 100644 index 0000000000..b4c008beed --- /dev/null +++ b/yt_dlp/extractor/jditemvideo.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +import json +import random +import time + +from .common import InfoExtractor +from ..utils import determine_ext + + +class JdItemVideoIE(InfoExtractor): + _VALID_URL = r"https://.+.jd.[a-z\.]{2,9}/(?P\d{6,16}).html" + + IE_NAME = 'jd-video' + IE_DESC = 'jd-video extractor' + _NETRC_MACHINE = False + + _JD_API_VIDEO_CALLBACK_URL = 'https://cd.jd.com/tencent/video_v3?callback=jQuery{rand}&vid={video_id}&type=1&from=1&appid=24&_={timestamp}' + + _TESTS = [ + { + 'url': 'https://npcitem.jd.hk/100030101538.html', + 'info_dict': { + "id": "100030101538", + "ext": "mp4", + "title": "ipad 2021第九代", + "description": "【AppleiPad】Apple苹果 iPad 第9代 10.2英寸平板电脑 2021款 ipad9(64GB WLAN版/A13芯片/1200万像素/iPadOS)深空灰色【行情 报价 价格 评测】-京东", + "size": 10251794, + "width": 1280, + "height": 1280, + "duration": 56, + "thumbnail": "https://jvod.300hu.com/img/2022/130871763/1/img7.jpg", + "url": "https://jvod.300hu.com/vod/product/6e02e2d8-98bc-491d-80a1-448ae5ea1c38/c6ef7b9b14ef4b9ca7e4cebda5b7684c.mp4?source=2&h265=h265/18799/a797504bd6f947dfbf6fdb96acfbb55f.mp4", + }, + }, + { + 'url': 'https://npcitem.jd.hk/100030101538.html', + 'info_dict': { + "id": "100037516759", + "ext": "mp4", + "title": "RODE Wireless Go II Dual", + "description": "【RODEWireless Go II Dual】罗德(RODE)Wireless Go II Dual无线领夹麦克风单反手机无线小蜜蜂采访直播vlog收音 一拖二2代 标配【行情 报价 价格 评测】-京东", + "size": 7547769, + "width": 1280, + "height": 720, + "duration": 60, + "thumbnail": "https://jvod.300hu.com/img/2022/219535842/1/img7.jpg", + "url": "https://jvod.300hu.com/vod/product/1fc0661d-546e-446e-a429-a8db696ab06a/4067f4c3bb2d41c5af84081d2b0e3018.mp4?source=2&h265=h265/113074/cf365c28ca3a4fdb8178c4e44f916341.mp4", + }, + }, + ] + + def _real_extract(self, url): + + item_id = self._match_id(url=url) + resp = self._download_webpage(url_or_request=url, video_id=item_id) + pattern_data = self._html_search_regex(pattern=r'"mainVideoId":"(\d+?)"', string=resp, name='videoId', + default=None) + if pattern_data is None: + raise ValueError( + "There are no any video. %s" % url + ) + + + description = self._html_extract_title(resp) + rand = random.randint(433333, 999999) + timestamp = int(time.time() * 1000) + url = self._JD_API_VIDEO_CALLBACK_URL.format(rand=rand, timestamp=timestamp, video_id=pattern_data) + mp4resp = self._download_webpage( + url_or_request=url, + video_id=item_id + ) + detailResp = self._html_search_regex(pattern=r'jQuery\d+\((.+)\)', string=mp4resp, name='detail', default=None) + if detailResp is None: + raise ValueError( + "Callback fail. return: %s" % detailResp + ) + + detailRespJson = json.loads(detailResp) + if detailRespJson.get("code", -1) != 0: + raise ValueError( + "Callback fail. return: %s" % detailResp + ) + + ext = determine_ext(url=detailRespJson.get("playUrl", "")) + + info_dict = { + 'id': item_id, + 'ext': ext, + 'title': detailRespJson.get("extInfo", {}).get("videoName") or "unknown_video_title", + 'description': description, + 'size': detailRespJson.get("extInfo", {}).get("size"), + 'width': detailRespJson.get("extInfo", {}).get("vwidth"), + 'height': detailRespJson.get("extInfo", {}).get("vheight"), + 'duration': detailRespJson.get("duration"), + 'thumbnail': detailRespJson.get("imageUrl"), + 'url': detailRespJson.get("playUrl") + } + return info_dict From 0d917bba3fbb2be70ce13df0f473a8a536f58ce2 Mon Sep 17 00:00:00 2001 From: pikadoramon Date: Wed, 28 Jun 2023 20:17:05 +0800 Subject: [PATCH 3/6] [extractor/JdItemVideo] Add Extractor. To extract the video links from a JD.com product page --- yt_dlp/extractor/jditemvideo.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/jditemvideo.py b/yt_dlp/extractor/jditemvideo.py index b4c008beed..8b5d917416 100644 --- a/yt_dlp/extractor/jditemvideo.py +++ b/yt_dlp/extractor/jditemvideo.py @@ -50,36 +50,25 @@ class JdItemVideoIE(InfoExtractor): ] def _real_extract(self, url): - item_id = self._match_id(url=url) resp = self._download_webpage(url_or_request=url, video_id=item_id) pattern_data = self._html_search_regex(pattern=r'"mainVideoId":"(\d+?)"', string=resp, name='videoId', default=None) if pattern_data is None: - raise ValueError( - "There are no any video. %s" % url - ) - + raise ValueError("There are no any video. %s" % url) description = self._html_extract_title(resp) rand = random.randint(433333, 999999) timestamp = int(time.time() * 1000) url = self._JD_API_VIDEO_CALLBACK_URL.format(rand=rand, timestamp=timestamp, video_id=pattern_data) - mp4resp = self._download_webpage( - url_or_request=url, - video_id=item_id - ) + mp4resp = self._download_webpage(url_or_request=url, video_id=item_id) detailResp = self._html_search_regex(pattern=r'jQuery\d+\((.+)\)', string=mp4resp, name='detail', default=None) if detailResp is None: - raise ValueError( - "Callback fail. return: %s" % detailResp - ) + raise ValueError("Callback fail. return: %s" % detailResp) detailRespJson = json.loads(detailResp) if detailRespJson.get("code", -1) != 0: - raise ValueError( - "Callback fail. return: %s" % detailResp - ) + raise ValueError("Callback fail. return: %s" % detailResp) ext = determine_ext(url=detailRespJson.get("playUrl", "")) From b7f94ef67ad5c228c1da5533489a4ce02a8c96dc Mon Sep 17 00:00:00 2001 From: zhangzhanming Date: Wed, 28 Jun 2023 20:29:54 +0800 Subject: [PATCH 4/6] [extractor/JdItemVideo] Add Extractor. To extract the video links from a JD.com product page --- yt_dlp/extractor/jditemvideo.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/jditemvideo.py b/yt_dlp/extractor/jditemvideo.py index 8b5d917416..18f385ddd8 100644 --- a/yt_dlp/extractor/jditemvideo.py +++ b/yt_dlp/extractor/jditemvideo.py @@ -4,7 +4,7 @@ import time from .common import InfoExtractor -from ..utils import determine_ext +from ..utils import determine_ext, ExtractorError class JdItemVideoIE(InfoExtractor): @@ -55,20 +55,20 @@ def _real_extract(self, url): pattern_data = self._html_search_regex(pattern=r'"mainVideoId":"(\d+?)"', string=resp, name='videoId', default=None) if pattern_data is None: - raise ValueError("There are no any video. %s" % url) + raise ExtractorError("There are no any video. %s" % url) description = self._html_extract_title(resp) rand = random.randint(433333, 999999) timestamp = int(time.time() * 1000) url = self._JD_API_VIDEO_CALLBACK_URL.format(rand=rand, timestamp=timestamp, video_id=pattern_data) mp4resp = self._download_webpage(url_or_request=url, video_id=item_id) - detailResp = self._html_search_regex(pattern=r'jQuery\d+\((.+)\)', string=mp4resp, name='detail', default=None) + detailResp = self._html_search_regex(pattern=r'jQuery\d+\((.+)\)', string=mp4resp, name='detail') if detailResp is None: - raise ValueError("Callback fail. return: %s" % detailResp) + raise ExtractorError("Callback fail. return: %s" % detailResp) detailRespJson = json.loads(detailResp) if detailRespJson.get("code", -1) != 0: - raise ValueError("Callback fail. return: %s" % detailResp) + raise ExtractorError("Callback fail. return: %s" % detailResp) ext = determine_ext(url=detailRespJson.get("playUrl", "")) From 48e821ab165ce3be525fd007a26427162a9fbb74 Mon Sep 17 00:00:00 2001 From: zhangzhanming Date: Wed, 28 Jun 2023 21:12:55 +0800 Subject: [PATCH 5/6] [extractor/JdItemVideo] Add Extractor. fix some problems --- yt_dlp/extractor/jditemvideo.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/jditemvideo.py b/yt_dlp/extractor/jditemvideo.py index 18f385ddd8..a04c858a45 100644 --- a/yt_dlp/extractor/jditemvideo.py +++ b/yt_dlp/extractor/jditemvideo.py @@ -4,7 +4,7 @@ import time from .common import InfoExtractor -from ..utils import determine_ext, ExtractorError +from ..utils import determine_ext, ExtractorError, traverse_obj class JdItemVideoIE(InfoExtractor): @@ -52,8 +52,7 @@ class JdItemVideoIE(InfoExtractor): def _real_extract(self, url): item_id = self._match_id(url=url) resp = self._download_webpage(url_or_request=url, video_id=item_id) - pattern_data = self._html_search_regex(pattern=r'"mainVideoId":"(\d+?)"', string=resp, name='videoId', - default=None) + pattern_data = self._html_search_regex(pattern=r'"mainVideoId":"(\d+?)"', string=resp, name='videoId') if pattern_data is None: raise ExtractorError("There are no any video. %s" % url) @@ -75,11 +74,11 @@ def _real_extract(self, url): info_dict = { 'id': item_id, 'ext': ext, - 'title': detailRespJson.get("extInfo", {}).get("videoName") or "unknown_video_title", + 'title': traverse_obj(detailRespJson, ('extInfo', 'videoName'), default="unknown_video_title"), 'description': description, - 'size': detailRespJson.get("extInfo", {}).get("size"), - 'width': detailRespJson.get("extInfo", {}).get("vwidth"), - 'height': detailRespJson.get("extInfo", {}).get("vheight"), + 'size': traverse_obj(detailRespJson, ("extInfo","size")), + 'width': traverse_obj(detailRespJson, ("extInfo", "vwidth")), + 'height': traverse_obj(detailRespJson, ("extInfo", "vheight")), 'duration': detailRespJson.get("duration"), 'thumbnail': detailRespJson.get("imageUrl"), 'url': detailRespJson.get("playUrl") From 75432034fea31f6ebbe0f7227aaa0c97cdb2d21f Mon Sep 17 00:00:00 2001 From: zhangzhanming Date: Wed, 28 Jun 2023 21:15:40 +0800 Subject: [PATCH 6/6] [extractor/JdItemVideo] Add Extractor. fix some problems --- yt_dlp/extractor/jditemvideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/jditemvideo.py b/yt_dlp/extractor/jditemvideo.py index a04c858a45..daa9532e44 100644 --- a/yt_dlp/extractor/jditemvideo.py +++ b/yt_dlp/extractor/jditemvideo.py @@ -76,7 +76,7 @@ def _real_extract(self, url): 'ext': ext, 'title': traverse_obj(detailRespJson, ('extInfo', 'videoName'), default="unknown_video_title"), 'description': description, - 'size': traverse_obj(detailRespJson, ("extInfo","size")), + 'size': traverse_obj(detailRespJson, ("extInfo", "size")), 'width': traverse_obj(detailRespJson, ("extInfo", "vwidth")), 'height': traverse_obj(detailRespJson, ("extInfo", "vheight")), 'duration': detailRespJson.get("duration"),