diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index c0032fcab9..050c252e3f 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -7,6 +7,7 @@ compat_urllib_parse_urlparse, ) from ..utils import ( + format_field, float_or_none, int_or_none, parse_iso8601, @@ -92,7 +93,7 @@ def _real_extract(self, url): 'timestamp': parse_iso8601(video.get('creationTime')), 'channel': channel.get('name'), 'channel_id': channel_id, - 'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None, + 'channel_url': format_field(channel_id, template=f'{self._BASE_URL}/?channel=%s'), 'duration': float_or_none(video.get('duration'), 1000), 'view_count': int_or_none(video.get('views')), 'tags': video.get('hashtags'), diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index b5d1b57af2..f5e559c9f4 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -9,6 +9,7 @@ compat_str, ) from ..utils import ( + format_field, int_or_none, parse_iso8601, smuggle_url, @@ -43,7 +44,7 @@ def _parse_video_data(self, video_data, video_id, is_live): 'id': video_id, 'title': title, 'description': video_data.get('description_en') or video_data.get('description_ar'), - 'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None, + 'thumbnail': format_field(img, template='http://admin.mangomolo.com/analytics/%s'), 'duration': int_or_none(video_data.get('duration')), 'timestamp': parse_iso8601(video_data.get('create_time'), ' '), 'is_live': is_live, diff --git a/yt_dlp/extractor/carambatv.py b/yt_dlp/extractor/carambatv.py index b57b86af7e..7e5cc90fb5 100644 --- a/yt_dlp/extractor/carambatv.py +++ b/yt_dlp/extractor/carambatv.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + format_field, float_or_none, int_or_none, try_get, @@ -43,7 +44,7 @@ def _real_extract(self, url): formats = [{ 'url': base_url + f['fn'], 'height': int_or_none(f.get('height')), - 'format_id': '%sp' % f['height'] if f.get('height') else None, + 'format_id': format_field(f, 'height', '%sp'), } for f in video['qualities'] if f.get('fn')] self._sort_formats(formats) diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py index 6c82fae3c4..2ed6c2bdc6 100644 --- a/yt_dlp/extractor/flickr.py +++ b/yt_dlp/extractor/flickr.py @@ -7,6 +7,7 @@ ) from ..utils import ( ExtractorError, + format_field, int_or_none, qualities, ) @@ -95,7 +96,7 @@ def _real_extract(self, url): owner = video_info.get('owner', {}) uploader_id = owner.get('nsid') uploader_path = owner.get('path_alias') or uploader_id - uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None + uploader_url = format_field(uploader_path, template='https://www.flickr.com/photos/%s/') return { 'id': video_id, diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py index 7f2f6f3e1d..a13e528f52 100644 --- a/yt_dlp/extractor/gamejolt.py +++ b/yt_dlp/extractor/gamejolt.py @@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse_unquote from ..utils import ( determine_ext, + format_field, int_or_none, str_or_none, traverse_obj, @@ -86,7 +87,7 @@ def _parse_post(self, post_data): 'display_id': post_data.get('slug'), 'uploader': user_data.get('display_name') or user_data.get('name'), 'uploader_id': user_data.get('username'), - 'uploader_url': 'https://gamejolt.com' + user_data['url'] if user_data.get('url') else None, + 'uploader_url': format_field(user_data, 'url', 'https://gamejolt.com%s'), 'categories': [try_get(category, lambda x: '%s - %s' % (x['community']['name'], x['channel'].get('display_title') or x['channel']['title'])) for category in post_data.get('communities' or [])], 'tags': traverse_obj( diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 68288495ca..645e4dff11 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -12,6 +12,7 @@ ) from ..utils import ( ExtractorError, + format_field, float_or_none, get_element_by_attribute, int_or_none, @@ -341,7 +342,7 @@ def _real_extract(self, url): if nodes: return self.playlist_result( self._extract_nodes(nodes, True), video_id, - 'Post by %s' % uploader_id if uploader_id else None, description) + format_field(uploader_id, template='Post by %s'), description) video_url = self._og_search_video_url(webpage, secure=False) diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py index 6376181835..7350f537c6 100644 --- a/yt_dlp/extractor/joj.py +++ b/yt_dlp/extractor/joj.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + format_field, int_or_none, js_to_json, try_get, @@ -72,7 +73,7 @@ def _real_extract(self, url): r'(\d+)[pP]\.', format_url, 'height', default=None) formats.append({ 'url': format_url, - 'format_id': '%sp' % height if height else None, + 'format_id': format_field(height, template='%sp'), 'height': int(height), }) if not formats: diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index c8f60ef455..c582164580 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -12,6 +12,7 @@ from ..utils import ( clean_html, ExtractorError, + format_field, int_or_none, unsmuggle_url, smuggle_url, @@ -372,6 +373,6 @@ def sign_url(unsigned_url): 'thumbnail': info.get('thumbnailUrl'), 'duration': info.get('duration'), 'timestamp': info.get('createdAt'), - 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None, + 'uploader_id': format_field(info, 'userId', ignore=('None', None)), 'view_count': info.get('plays'), } diff --git a/yt_dlp/extractor/keezmovies.py b/yt_dlp/extractor/keezmovies.py index 027f43cf01..06dbcbb402 100644 --- a/yt_dlp/extractor/keezmovies.py +++ b/yt_dlp/extractor/keezmovies.py @@ -8,6 +8,7 @@ from ..utils import ( determine_ext, ExtractorError, + format_field, int_or_none, str_to_int, strip_or_none, @@ -69,7 +70,7 @@ def extract_format(format_url, height=None): video_url, title, 32).decode('utf-8') formats.append({ 'url': format_url, - 'format_id': '%dp' % height if height else None, + 'format_id': format_field(height, template='%dp'), 'height': height, 'tbr': tbr, }) diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py index e1d5f21e17..24e04edb5e 100644 --- a/yt_dlp/extractor/line.py +++ b/yt_dlp/extractor/line.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + format_field, int_or_none, js_to_json, str_or_none, @@ -121,7 +122,7 @@ def _parse_broadcast_item(self, item): 'timestamp': int_or_none(item.get('createdAt')), 'channel': channel.get('name'), 'channel_id': channel_id, - 'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None, + 'channel_url': format_field(channel_id, template='https://live.line.me/channels/%s'), 'duration': int_or_none(item.get('archiveDuration')), 'view_count': int_or_none(item.get('viewerCount')), 'comment_count': int_or_none(item.get('chatCount')), diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py index 7da0b4284b..bd2dffac0c 100644 --- a/yt_dlp/extractor/lnkgo.py +++ b/yt_dlp/extractor/lnkgo.py @@ -73,14 +73,12 @@ def _real_extract(self, url): video_id, 'mp4', 'm3u8_native') self._sort_formats(formats) - poster_image = video_info.get('posterImage') - return { 'id': video_id, 'display_id': display_id, 'title': title, 'formats': formats, - 'thumbnail': 'https://lnk.lt/all-images/' + poster_image if poster_image else None, + 'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'), 'duration': int_or_none(video_info.get('duration')), 'description': clean_html(video_info.get('htmlDescription')), 'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0), diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 2ece5aac4a..59cc30736c 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -7,6 +7,7 @@ from ..compat import compat_str from ..utils import ( ExtractorError, + format_field, float_or_none, int_or_none, str_or_none, @@ -118,7 +119,7 @@ def add_item(container, item_url, height, id_key='format_id', item_id=None): author = try_get( hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {} author_id = str_or_none(author.get('id')) - author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None + author_url = format_field(author_id, template='https://medal.tv/users/%s') return { 'id': video_id, diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py index 8e9f0f8254..9da07207ba 100644 --- a/yt_dlp/extractor/minds.py +++ b/yt_dlp/extractor/minds.py @@ -5,6 +5,7 @@ from ..compat import compat_str from ..utils import ( clean_html, + format_field, int_or_none, str_or_none, strip_or_none, @@ -120,7 +121,7 @@ def _real_extract(self, url): 'timestamp': int_or_none(entity.get('time_created')), 'uploader': strip_or_none(owner.get('name')), 'uploader_id': uploader_id, - 'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None, + 'uploader_url': format_field(uploader_id, template='https://www.minds.com/%s'), 'view_count': int_or_none(entity.get('play:count')), 'like_count': int_or_none(entity.get('thumbs:up:count')), 'dislike_count': int_or_none(entity.get('thumbs:down:count')), diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 1e22f24e3f..e0b2ab9822 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + format_field, int_or_none, parse_resolution, str_or_none, @@ -1386,8 +1387,7 @@ def _extract_playlist(self, host, type, id): playlist_timestamp = unified_timestamp(info.get('createdAt')) channel = try_get(info, lambda x: x['ownerAccount']['name']) or info.get('displayName') channel_id = try_get(info, lambda x: x['ownerAccount']['id']) or info.get('id') - thumbnail = info.get('thumbnailPath') - thumbnail = f'https://{host}{thumbnail}' if thumbnail else None + thumbnail = format_field(info, 'thumbnailPath', f'https://{host}%s') entries = OnDemandPagedList(functools.partial( self.fetch_page, host, id, type), self._PAGE_SIZE) diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index d712e0036f..17c8c91007 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -18,6 +18,7 @@ clean_html, determine_ext, ExtractorError, + format_field, int_or_none, merge_dicts, NO_DEFAULT, @@ -431,7 +432,7 @@ def add_format(format_url, height=None): default=None)) formats.append({ 'url': format_url, - 'format_id': '%dp' % height if height else None, + 'format_id': format_field(height, template='%dp'), 'height': height, }) diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py index 2de7ab04a9..dc98973056 100644 --- a/yt_dlp/extractor/radlive.py +++ b/yt_dlp/extractor/radlive.py @@ -1,6 +1,12 @@ import json -from ..utils import ExtractorError, traverse_obj, try_get, unified_timestamp +from ..utils import ( + ExtractorError, + format_field, + traverse_obj, + try_get, + unified_timestamp +) from .common import InfoExtractor @@ -74,7 +80,7 @@ def _real_extract(self, url): 'release_timestamp': release_date, 'channel': channel.get('name'), 'channel_id': channel_id, - 'channel_url': f'https://rad.live/content/channel/{channel_id}' if channel_id else None, + 'channel_url': format_field(channel_id, template='https://rad.live/content/channel/%s'), } if content_type == 'episode': diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index 9c698626fc..e18a59a49c 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -5,7 +5,7 @@ from .common import InfoExtractor from ..utils import ( - # HEADRequest, + format_field, int_or_none, OnDemandPagedList, smuggle_url, @@ -26,18 +26,6 @@ def _parse_video(self, video): r'https?://player\.vimeo\.com/external/(\d+)', video['vimeoVideoURL'], 'vimeo id') - # video_url = self._request_webpage( - # HEADRequest(video['vimeoVideoURL']), video_id).geturl() - # formats = [] - # for v_url, suffix in [(video_url, '_sep'), (video_url.replace('/sep/video/', '/video/'), '')]: - # formats.extend(self._extract_m3u8_formats( - # v_url, video_id, 'mp4', 'm3u8_native', - # m3u8_id='hls' + suffix, fatal=False)) - # formats.extend(self._extract_mpd_formats( - # v_url.replace('.m3u8', '.mpd'), video_id, - # mpd_id='dash' + suffix, fatal=False)) - # self._sort_formats(formats) - uploader_id = video.get('hostID') return { @@ -51,7 +39,6 @@ def _parse_video(self, video): 'Referer': 'https://storyfire.com/', } }), - # 'formats': formats, 'thumbnail': video.get('storyImage'), 'view_count': int_or_none(video.get('views')), 'like_count': int_or_none(video.get('likesCount')), @@ -60,7 +47,7 @@ def _parse_video(self, video): 'timestamp': int_or_none(video.get('publishDate')), 'uploader': video.get('username'), 'uploader_id': uploader_id, - 'uploader_url': 'https://storyfire.com/user/%s/video' % uploader_id if uploader_id else None, + 'uploader_url': format_field(uploader_id, template='https://storyfire.com/user/%s/video'), 'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')), } diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 580cb533b1..65ea13ddb2 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, + format_field, int_or_none, str_or_none, try_get, @@ -28,7 +29,7 @@ def _extract_streamer_info(self, data): return { 'uploader': streamer_info.get('nickName'), 'uploader_id': str_or_none(streamer_info.get('uid')), - 'uploader_url': 'https://trovo.live/' + username if username else None, + 'uploader_url': format_field(username, template='https://trovo.live/%s'), } diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index f8a8d82e8e..8565a7c46b 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -13,6 +13,7 @@ from ..utils import ( dict_get, ExtractorError, + format_field, float_or_none, int_or_none, traverse_obj, @@ -469,7 +470,7 @@ def _real_extract(self, url): 'uploader': uploader, 'timestamp': unified_timestamp(status.get('created_at')), 'uploader_id': uploader_id, - 'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None, + 'uploader_url': format_field(uploader_id, template='https://twitter.com/%s'), 'like_count': int_or_none(status.get('favorite_count')), 'repost_count': int_or_none(status.get('retweet_count')), 'comment_count': int_or_none(status.get('reply_count')), diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 571448bf21..e99dbdefa1 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -6,6 +6,7 @@ from ..utils import ( clean_html, ExtractorError, + format_field, get_element_by_class, int_or_none, parse_iso8601, @@ -160,7 +161,7 @@ def _real_extract(self, url): 'uploader': user.get('name'), 'timestamp': parse_iso8601(video.get('created_at')), 'uploader_id': username, - 'uploader_url': 'https://www.vidio.com/@' + username if username else None, + 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'), 'channel': channel.get('name'), 'channel_id': str_or_none(channel.get('id')), 'view_count': get_count('view_count'), @@ -291,5 +292,5 @@ def _real_extract(self, url): 'uploader': user.get('name'), 'timestamp': parse_iso8601(stream_meta.get('start_time')), 'uploader_id': username, - 'uploader_url': 'https://www.vidio.com/@' + username if username else None, + 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'), } diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index ce7487ec16..a63919ff24 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( HEADRequest, + format_field, float_or_none, get_element_by_id, int_or_none, @@ -102,7 +103,7 @@ def _real_extract(self, url): uploader = self._search_regex( r']+class=["\']wt_person[^>]+>\s*]+\bhref=["\']/user/[^>]+>([^<]+)', webpage, 'uploader', fatal=False) - uploader_url = 'https://www.vidlii.com/user/%s' % uploader if uploader else None + uploader_url = format_field(uploader, template='https://www.vidlii.com/user/%s') upload_date = unified_strdate(self._html_search_meta( 'datePublished', webpage, default=None) or self._search_regex( diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py index 07fce0daa4..e59b1037b0 100644 --- a/yt_dlp/extractor/vine.py +++ b/yt_dlp/extractor/vine.py @@ -6,6 +6,7 @@ from ..compat import compat_str from ..utils import ( determine_ext, + format_field, int_or_none, unified_timestamp, ) @@ -92,7 +93,7 @@ def video_url(kind): username = data.get('username') - alt_title = 'Vine by %s' % username if username else None + alt_title = format_field(username, template='Vine by %s') return { 'id': video_id, diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py index 128faa30df..583aea38d9 100644 --- a/yt_dlp/extractor/younow.py +++ b/yt_dlp/extractor/younow.py @@ -7,6 +7,7 @@ from ..compat import compat_str from ..utils import ( ExtractorError, + format_field, int_or_none, try_get, ) @@ -93,7 +94,7 @@ def _extract_moment(item, fatal=True): uploader = try_get(item, lambda x: x['owner']['name'], compat_str) uploader_id = try_get(item, lambda x: x['owner']['userId']) - uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None + uploader_url = format_field(uploader, template='https://www.younow.com/%s') entry = { 'extractor_key': 'YouNowMoment', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 09a0b22799..d8a63a3d2d 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3331,7 +3331,7 @@ def feed_entry(name): 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None, 'uploader_url': owner_profile_url, 'channel_id': channel_id, - 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None, + 'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'), 'duration': duration, 'view_count': int_or_none( get_first((video_details, microformats), (..., 'viewCount')) diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py index d1ed55be35..278a9438e8 100644 --- a/yt_dlp/extractor/zhihu.py +++ b/yt_dlp/extractor/zhihu.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import float_or_none, int_or_none +from ..utils import format_field, float_or_none, int_or_none class ZhihuIE(InfoExtractor): @@ -61,7 +61,7 @@ def _real_extract(self, url): 'uploader': author.get('name'), 'timestamp': int_or_none(zvideo.get('published_at')), 'uploader_id': author.get('id'), - 'uploader_url': 'https://www.zhihu.com/people/' + url_token if url_token else None, + 'uploader_url': format_field(url_token, template='https://www.zhihu.com/people/%s'), 'duration': float_or_none(video.get('duration')), 'view_count': int_or_none(zvideo.get('play_count')), 'like_count': int_or_none(zvideo.get('liked_count')), diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index cb44a9c519..55bd67ea2f 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4975,13 +4975,10 @@ def to_high_limit_path(path): def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None): - if field is None: - val = obj if obj is not None else default - else: - val = obj.get(field, default) - if func and val not in ignore: - val = func(val) - return template % val if val not in ignore else default + val = traverse_obj(obj, *variadic(field)) + if val in ignore: + return default + return template % (func(val) if func else val) def clean_podcast_url(url):