From 92ad6defacc200ebc3054058867398d83da291a9 Mon Sep 17 00:00:00 2001 From: ndyanx Date: Tue, 13 Aug 2024 07:50:58 -0500 Subject: [PATCH 1/4] dropbox supported password,thumbnail --- yt_dlp/extractor/dropbox.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index 51b40df428..403dd75afa 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -42,18 +42,24 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) fn = urllib.parse.unquote(url_basename(url)) title = os.path.splitext(fn)[0] - password = self.get_param('videopassword') + thumbnail = None + + for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)): + part = base64.b64decode(encoded).decode('utf-8', 'ignore') + if 'sm/password' in part: + webpage = self._download_webpage( + 'https://www.dropbox.com/sm/password?' + part.split('?')[1], video_id) + if (self._og_search_title(webpage) == 'Dropbox - Password Required' or 'Enter the password for this link' in webpage): if password: content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id') - payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}' + payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url.replace("https://www.dropbox.com", "")}' response = self._download_json( 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(), headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'}) - if response.get('status') != 'authed': raise ExtractorError('Authentication failed!', expected=True) webpage = self._download_webpage(url, video_id) @@ -73,6 +79,8 @@ def _real_extract(self, url): if not transcode_url: continue formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4') + thumbnail = self._search_regex( + r'(https://www\.dropbox\.com/temp_thumb_from_token/c/(.*?)\?preserve_transparency=False&rlkey=(.*?)&secure_hash=&size=(.*?)&size_mode=4)', decoded, 'thumbnail url', default=None) break # downloads enabled we can get the original file @@ -86,6 +94,7 @@ def _real_extract(self, url): return { 'id': video_id, + 'thumbnail': thumbnail, 'title': title, 'formats': formats, 'subtitles': subtitles, From 2e703affc850c749d15a648ee371d7c8e685319e Mon Sep 17 00:00:00 2001 From: ndyanx Date: Tue, 13 Aug 2024 14:32:09 -0500 Subject: [PATCH 2/4] [ie/YandexDisk] supported password, thumbnail --- yt_dlp/extractor/yandexdisk.py | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index 3214816701..a14bd8da70 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -1,12 +1,17 @@ import json +import urllib.parse + +from html import unescape from .common import InfoExtractor from ..utils import ( determine_ext, + ExtractorError, float_or_none, int_or_none, join_nonempty, mimetype2ext, + traverse_obj, try_get, urljoin, ) @@ -62,6 +67,43 @@ def _real_extract(self, url): webpage, 'store'), video_id) resource = store['resources'][store['rootResourceId']] + if store['rootResourceId'] == 'password-protected': + data = { + "hash": resource['hash'], + "password": self.get_param('videopassword', default=''), + "sk": traverse_obj(store, ('environment', 'sk')), + } + json_string = json.dumps(data, separators=(',', ':')) + url_encoded_string = urllib.parse.quote(json_string, safe='') + data_bytes = url_encoded_string.encode('utf-8') + token = (self._download_json( + 'https://disk.yandex.ru/public/api/check-password', + video_id, data=data_bytes, fatal=False, + headers={ + 'Accept': '*/*', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Content-Type': 'text/plain', + 'Origin': 'https://disk.yandex.ru', + 'Pragma': 'no-cache', + 'Referer': url, + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-origin', + 'X-Requested-With': 'XMLHttpRequest', + 'X-Retpath-Y': url, + } + ) or {}).get('token') or {} + if not token: + raise ExtractorError('Password incorrect!', expected=True) + self._set_cookie('disk.yandex.ru', name='passToken', value=token) + webpage = self._download_webpage(url, video_id) + store = self._parse_json(self._search_regex( + r']+id="store-prefetch"[^>]*>\s*({.+?})\s*', + webpage, 'store'), video_id) + resource = store['resources'][store['rootResourceId']] + + thumbnail = unescape(self._og_search_property('image', webpage)) title = resource['name'] meta = resource.get('meta') or {} @@ -132,6 +174,7 @@ def call_api(action): return { 'id': video_id, 'title': title, + 'thumbnail': thumbnail, 'duration': float_or_none(video_streams.get('duration'), 1000), 'uploader': display_name, 'uploader_id': uid, From 56c99e87cbb8db1410f5a4de6f111926eead9b9a Mon Sep 17 00:00:00 2001 From: ndyanx Date: Tue, 13 Aug 2024 14:54:00 -0500 Subject: [PATCH 3/4] [ie/dropbox] supported password,thumbnail --- yt_dlp/extractor/yandexdisk.py | 43 ---------------------------------- 1 file changed, 43 deletions(-) diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index a14bd8da70..3214816701 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -1,17 +1,12 @@ import json -import urllib.parse - -from html import unescape from .common import InfoExtractor from ..utils import ( determine_ext, - ExtractorError, float_or_none, int_or_none, join_nonempty, mimetype2ext, - traverse_obj, try_get, urljoin, ) @@ -67,43 +62,6 @@ def _real_extract(self, url): webpage, 'store'), video_id) resource = store['resources'][store['rootResourceId']] - if store['rootResourceId'] == 'password-protected': - data = { - "hash": resource['hash'], - "password": self.get_param('videopassword', default=''), - "sk": traverse_obj(store, ('environment', 'sk')), - } - json_string = json.dumps(data, separators=(',', ':')) - url_encoded_string = urllib.parse.quote(json_string, safe='') - data_bytes = url_encoded_string.encode('utf-8') - token = (self._download_json( - 'https://disk.yandex.ru/public/api/check-password', - video_id, data=data_bytes, fatal=False, - headers={ - 'Accept': '*/*', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - 'Content-Type': 'text/plain', - 'Origin': 'https://disk.yandex.ru', - 'Pragma': 'no-cache', - 'Referer': url, - 'Sec-Fetch-Dest': 'empty', - 'Sec-Fetch-Mode': 'cors', - 'Sec-Fetch-Site': 'same-origin', - 'X-Requested-With': 'XMLHttpRequest', - 'X-Retpath-Y': url, - } - ) or {}).get('token') or {} - if not token: - raise ExtractorError('Password incorrect!', expected=True) - self._set_cookie('disk.yandex.ru', name='passToken', value=token) - webpage = self._download_webpage(url, video_id) - store = self._parse_json(self._search_regex( - r']+id="store-prefetch"[^>]*>\s*({.+?})\s*', - webpage, 'store'), video_id) - resource = store['resources'][store['rootResourceId']] - - thumbnail = unescape(self._og_search_property('image', webpage)) title = resource['name'] meta = resource.get('meta') or {} @@ -174,7 +132,6 @@ def call_api(action): return { 'id': video_id, 'title': title, - 'thumbnail': thumbnail, 'duration': float_or_none(video_streams.get('duration'), 1000), 'uploader': display_name, 'uploader_id': uid, From 3c7affb938050b7a476c209a9ee195f132b02207 Mon Sep 17 00:00:00 2001 From: ndyanx Date: Tue, 13 Aug 2024 15:12:31 -0500 Subject: [PATCH 4/4] [ie/yandexdisk] supported password,thumbnail --- yt_dlp/extractor/dropbox.py | 15 +++----------- yt_dlp/extractor/yandexdisk.py | 38 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index 403dd75afa..51b40df428 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -42,24 +42,18 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) fn = urllib.parse.unquote(url_basename(url)) title = os.path.splitext(fn)[0] + password = self.get_param('videopassword') - thumbnail = None - - for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)): - part = base64.b64decode(encoded).decode('utf-8', 'ignore') - if 'sm/password' in part: - webpage = self._download_webpage( - 'https://www.dropbox.com/sm/password?' + part.split('?')[1], video_id) - if (self._og_search_title(webpage) == 'Dropbox - Password Required' or 'Enter the password for this link' in webpage): if password: content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id') - payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url.replace("https://www.dropbox.com", "")}' + payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}' response = self._download_json( 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(), headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'}) + if response.get('status') != 'authed': raise ExtractorError('Authentication failed!', expected=True) webpage = self._download_webpage(url, video_id) @@ -79,8 +73,6 @@ def _real_extract(self, url): if not transcode_url: continue formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4') - thumbnail = self._search_regex( - r'(https://www\.dropbox\.com/temp_thumb_from_token/c/(.*?)\?preserve_transparency=False&rlkey=(.*?)&secure_hash=&size=(.*?)&size_mode=4)', decoded, 'thumbnail url', default=None) break # downloads enabled we can get the original file @@ -94,7 +86,6 @@ def _real_extract(self, url): return { 'id': video_id, - 'thumbnail': thumbnail, 'title': title, 'formats': formats, 'subtitles': subtitles, diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index 3214816701..fa639bb06a 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -1,12 +1,15 @@ import json +import urllib.parse from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, float_or_none, int_or_none, join_nonempty, mimetype2ext, + traverse_obj, try_get, urljoin, ) @@ -62,6 +65,40 @@ def _real_extract(self, url): webpage, 'store'), video_id) resource = store['resources'][store['rootResourceId']] + if store['rootResourceId'] == 'password-protected': + data = { + 'hash': resource['hash'], + 'password': self.get_param('videopassword', default=''), + 'sk': traverse_obj(store, ('environment', 'sk'))} + json_string = json.dumps(data, separators=(',', ':')) + url_encoded_string = urllib.parse.quote(json_string, safe='') + data_bytes = url_encoded_string.encode('utf-8') + token = (self._download_json( + 'https://disk.yandex.ru/public/api/check-password', + video_id, data=data_bytes, fatal=False, + headers={ + 'Accept': '*/*', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Content-Type': 'text/plain', + 'Origin': 'https://disk.yandex.ru', + 'Pragma': 'no-cache', + 'Referer': url, + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-origin', + 'X-Requested-With': 'XMLHttpRequest', + 'X-Retpath-Y': url}) or {}).get('token') or {} + if not token: + raise ExtractorError('Password incorrect!', expected=True) + self._set_cookie('disk.yandex.ru', name='passToken', value=token) + webpage = self._download_webpage(url, video_id) + store = self._parse_json(self._search_regex( + r']+id="store-prefetch"[^>]*>\s*({.+?})\s*', + webpage, 'store'), video_id) + resource = store['resources'][store['rootResourceId']] + + thumbnail = self._og_search_property('image', webpage) title = resource['name'] meta = resource.get('meta') or {} @@ -132,6 +169,7 @@ def call_api(action): return { 'id': video_id, 'title': title, + 'thumbnail': thumbnail, 'duration': float_or_none(video_streams.get('duration'), 1000), 'uploader': display_name, 'uploader_id': uid,