From 905672d37861262d808d5195c4c2640adba9f3d3 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 25 Aug 2024 17:36:15 +0000 Subject: [PATCH 1/5] [ie/aparat:playlist] Add extractor --- yt_dlp/extractor/_extractors.py | 2 +- yt_dlp/extractor/aparat.py | 69 +++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9b73fcd75e..b031db02ab 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -135,7 +135,7 @@ from .anvato import AnvatoIE from .aol import AolIE from .apa import APAIE -from .aparat import AparatIE +from .aparat import AparatIE, AparatPlaylistIE from .appleconnect import AppleConnectIE from .applepodcasts import ApplePodcastsIE from .appletrailers import ( diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py index 4a989d837b..84108513ee 100644 --- a/yt_dlp/extractor/aparat.py +++ b/yt_dlp/extractor/aparat.py @@ -1,9 +1,12 @@ +import urllib.parse + from .common import InfoExtractor from ..utils import ( get_element_by_id, int_or_none, merge_dicts, mimetype2ext, + traverse_obj, url_or_none, ) @@ -86,3 +89,69 @@ def _real_extract(self, url): 'duration': int_or_none(options.get('duration')), 'formats': formats, }) + + +class AparatPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?aparat\.com/playlist/(?P\d+)' + + _TESTS = [{ + 'url': 'https://www.aparat.com/playlist/1001307', + 'info_dict': { + 'id': '1001307', + 'title': 'مبانی یادگیری عمیق', + 'description': '', + 'thumbnails': 'count:2', + 'channel': 'mrmohammadi_iust', + 'channel_id': '6463423', + 'channel_url': 'https://www.aparat.com/mrmohammadi_iust', + 'channel_follower_count': int, + }, + 'playlist_mincount': 1, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.aparat.com/playlist/1234567', + 'info_dict': { + 'id': '1234567', + 'title': 'ساخت اکانت', + 'description': '', + 'thumbnails': 'count:0', + 'channel': 'reza.shadow', + 'channel_id': '8159952', + 'channel_url': 'https://www.aparat.com/reza.shadow', + 'channel_follower_count': int, + }, + 'playlist_count': 0, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.aparat.com/playlist/1256882', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + info = self._download_json( + f'https://www.aparat.com/api/fa/v1/video/playlist/one/playlist_id/{playlist_id}', playlist_id) + + info_dict = traverse_obj(info, ('data', 'attributes', { + 'playlist_title': ('title'), + 'description': ('description'), + }), default={}) + info_dict.update(thumbnails=traverse_obj([ + traverse_obj(info, ('data', 'attributes', {'url': ('big_poster', {url_or_none})})), + traverse_obj(info, ('data', 'attributes', {'url': ('small_poster', {url_or_none})})), + ], (...), default=[])) + info_dict.update(**traverse_obj(info, ('included', lambda _, v: v['type'] == 'channel', 'attributes', { + 'channel': ('username'), + 'channel_id': ('id'), + 'channel_url': ('link', {lambda x: urllib.parse.urljoin(url, x)}), # starts with a slash + 'channel_follower_count': ('follower_cnt', {int_or_none}), + }), get_all=False)) + + return self.playlist_result(traverse_obj(info, ( + 'included', lambda _, v: v['type'] == 'Video', 'attributes', 'uid', + {lambda uid: self.url_result(f'https://www.aparat.com/v/{uid}?playlist={playlist_id}')}, + ), default=[]), playlist_id, **info_dict) From 322acaef8a603a54b20b6db426c90b9ff8d165f5 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:25:24 +0000 Subject: [PATCH 2/5] fix code style in _extractors.py Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b031db02ab..9bc0d92918 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -135,7 +135,10 @@ from .anvato import AnvatoIE from .aol import AolIE from .apa import APAIE -from .aparat import AparatIE, AparatPlaylistIE +from .aparat import ( + AparatIE, + AparatPlaylistIE, +) from .appleconnect import AppleConnectIE from .applepodcasts import ApplePodcastsIE from .appletrailers import ( From 1db3eaa77e61e19d0ec66f2326f21d5bf25f91f8 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 16 Nov 2024 07:17:48 +0000 Subject: [PATCH 3/5] simplify function call in traversal --- yt_dlp/extractor/aparat.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py index 84108513ee..a6dbf2b8f1 100644 --- a/yt_dlp/extractor/aparat.py +++ b/yt_dlp/extractor/aparat.py @@ -1,5 +1,3 @@ -import urllib.parse - from .common import InfoExtractor from ..utils import ( get_element_by_id, @@ -8,6 +6,7 @@ mimetype2ext, traverse_obj, url_or_none, + urljoin, ) @@ -147,7 +146,7 @@ def _real_extract(self, url): info_dict.update(**traverse_obj(info, ('included', lambda _, v: v['type'] == 'channel', 'attributes', { 'channel': ('username'), 'channel_id': ('id'), - 'channel_url': ('link', {lambda x: urllib.parse.urljoin(url, x)}), # starts with a slash + 'channel_url': ('link', filter, {urljoin(base=url)}), # starts with a slash 'channel_follower_count': ('follower_cnt', {int_or_none}), }), get_all=False)) From dc2c2f1267a966cfb94537970258a99d0efd2b0a Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 16 Nov 2024 07:46:11 +0000 Subject: [PATCH 4/5] imports --- yt_dlp/extractor/aparat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py index a6dbf2b8f1..67dbc709f6 100644 --- a/yt_dlp/extractor/aparat.py +++ b/yt_dlp/extractor/aparat.py @@ -4,10 +4,10 @@ int_or_none, merge_dicts, mimetype2ext, - traverse_obj, url_or_none, urljoin, ) +from ..utils.traversal import traverse_obj class AparatIE(InfoExtractor): From d810fff277fa1e969229f7119ad58aedeaab5df0 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 22 Nov 2024 17:11:55 +0000 Subject: [PATCH 5/5] fix code style; fix channel follower count; fix test --- yt_dlp/extractor/aparat.py | 60 +++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py index 67dbc709f6..45972f6a77 100644 --- a/yt_dlp/extractor/aparat.py +++ b/yt_dlp/extractor/aparat.py @@ -1,12 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - get_element_by_id, - int_or_none, - merge_dicts, - mimetype2ext, - url_or_none, - urljoin, -) +from ..utils import get_element_by_id, int_or_none, merge_dicts, mimetype2ext, str_or_none, url_or_none from ..utils.traversal import traverse_obj @@ -26,6 +19,8 @@ class AparatIE(InfoExtractor): 'timestamp': 1387394859, 'upload_date': '20131218', 'view_count': int, + 'thumbnail': r're:https://static\.cdn\.asset\.aparat\.cloud/.+', + 'like_count': int, }, }, { # multiple formats @@ -133,24 +128,35 @@ class AparatPlaylistIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) info = self._download_json( - f'https://www.aparat.com/api/fa/v1/video/playlist/one/playlist_id/{playlist_id}', playlist_id) + f'https://www.aparat.com/api/fa/v1/video/playlist/one/playlist_id/{playlist_id}', + playlist_id, note='Getting playlist info', errnote='Failed to get playlist info') - info_dict = traverse_obj(info, ('data', 'attributes', { - 'playlist_title': ('title'), - 'description': ('description'), - }), default={}) - info_dict.update(thumbnails=traverse_obj([ - traverse_obj(info, ('data', 'attributes', {'url': ('big_poster', {url_or_none})})), - traverse_obj(info, ('data', 'attributes', {'url': ('small_poster', {url_or_none})})), - ], (...), default=[])) - info_dict.update(**traverse_obj(info, ('included', lambda _, v: v['type'] == 'channel', 'attributes', { - 'channel': ('username'), - 'channel_id': ('id'), - 'channel_url': ('link', filter, {urljoin(base=url)}), # starts with a slash - 'channel_follower_count': ('follower_cnt', {int_or_none}), - }), get_all=False)) + info_dict = { + **traverse_obj(info, ('data', 'attributes', { + 'title': 'title', + 'description': 'description', + 'thumbnails': (('big_poster', 'small_poster'), all, ..., {url_or_none}, {lambda x: {'url': x}}), + }), default={}), + 'id': playlist_id, + 'entries': traverse_obj(info, ( + 'included', lambda _, v: v['type'] == 'Video', 'attributes', 'uid', + {lambda x: self.url_result(f'https://www.aparat.com/v/{x}?playlist={playlist_id}')}, + ), default=[]), + } - return self.playlist_result(traverse_obj(info, ( - 'included', lambda _, v: v['type'] == 'Video', 'attributes', 'uid', - {lambda uid: self.url_result(f'https://www.aparat.com/v/{uid}?playlist={playlist_id}')}, - ), default=[]), playlist_id, **info_dict) + if username := traverse_obj( + info, ('included', lambda _, v: v['type'] == 'channel', 'attributes', 'username'), get_all=False): + user_info = self._download_json( + f'https://www.aparat.com/api/fa/v1/user/user/information/username/{username}', playlist_id, + fatal=False, note=f'Getting channel info ({username})', errnote=f'Failed to get channel info ({username})', + ) + info_dict.update({ + **traverse_obj(user_info, ('data', 'attributes', { + 'channel_id': ('id', {str_or_none}), + 'channel_follower_count': ('follower_cnt_num', {int_or_none}), + })), + 'channel': username, + 'channel_url': f'https://www.aparat.com/{username}', + }) + + return self.playlist_result(**info_dict)