[ie/Floatplane] Improve metadata extraction (#8934)

Authored by: chtk
This commit is contained in:
chtk 2024-01-22 06:57:52 +01:00 committed by GitHub
parent f0e8bc7c60
commit 9cd9044790
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -11,6 +11,7 @@
join_nonempty, join_nonempty,
parse_codecs, parse_codecs,
parse_iso8601, parse_iso8601,
url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj
@ -108,6 +109,64 @@ class FloatplaneIE(InfoExtractor):
'availability': 'subscriber_only', 'availability': 'subscriber_only',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.floatplane.com/post/65B5PNoBtf',
'info_dict': {
'id': '65B5PNoBtf',
'description': 'I recorded the inbuilt demo mode for your 90\'s enjoyment, thanks for being Floaties!',
'display_id': '65B5PNoBtf',
'like_count': int,
'release_timestamp': 1701249480,
'uploader': 'The Trash Network',
'availability': 'subscriber_only',
'uploader_id': '61bc20c9a131fb692bf2a513',
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
'comment_count': int,
'title': 'The $50 electronic drum kit.',
'channel_id': '64424fe73cd58cbcf8d8e131',
'thumbnail': 'https://pbs.floatplane.com/blogPost_thumbnails/65B5PNoBtf/725555379422705_1701247052743.jpeg',
'dislike_count': int,
'channel': 'The Drum Thing',
'release_date': '20231129',
},
'playlist_count': 2,
'playlist': [{
'info_dict': {
'id': 'ISPJjexylS',
'ext': 'mp4',
'release_date': '20231129',
'release_timestamp': 1701249480,
'title': 'The $50 electronic drum kit. .mov',
'channel_id': '64424fe73cd58cbcf8d8e131',
'thumbnail': 'https://pbs.floatplane.com/video_thumbnails/ISPJjexylS/335202812134041_1701249383392.jpeg',
'availability': 'subscriber_only',
'uploader': 'The Trash Network',
'duration': 622,
'channel': 'The Drum Thing',
'uploader_id': '61bc20c9a131fb692bf2a513',
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
},
}, {
'info_dict': {
'id': 'qKfxu6fEpu',
'ext': 'aac',
'release_date': '20231129',
'release_timestamp': 1701249480,
'title': 'Roland TD-7 Demo.m4a',
'channel_id': '64424fe73cd58cbcf8d8e131',
'availability': 'subscriber_only',
'uploader': 'The Trash Network',
'duration': 114,
'channel': 'The Drum Thing',
'uploader_id': '61bc20c9a131fb692bf2a513',
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
},
}],
'skip': 'requires subscription: "The Trash Network"',
'params': {'skip_download': 'm3u8'},
}] }]
def _real_initialize(self): def _real_initialize(self):
@ -124,6 +183,22 @@ def _real_extract(self, url):
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))): if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
raise ExtractorError('Post does not contain a video or audio track', expected=True) raise ExtractorError('Post does not contain a video or audio track', expected=True)
uploader_url = format_field(
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
common_info = {
'uploader_url': uploader_url,
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
'availability': self._availability(needs_subscription=True),
**traverse_obj(post_data, {
'uploader': ('creator', 'title', {str}),
'uploader_id': ('creator', 'id', {str}),
'channel': ('channel', 'title', {str}),
'channel_id': ('channel', 'id', {str}),
'release_timestamp': ('releaseDate', {parse_iso8601}),
}),
}
items = [] items = []
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)): for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
media_id = media['id'] media_id = media['id']
@ -150,11 +225,11 @@ def format_path(params):
formats = [] formats = []
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)): for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
url = urljoin(stream['cdn'], format_path(traverse_obj( url = urljoin(stream['cdn'], format_path(traverse_obj(
stream, ('resource', 'data', 'qualityLevelParams', quality['name'])))) stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
formats.append({ formats.append({
**traverse_obj(quality, { **traverse_obj(quality, {
'format_id': 'name', 'format_id': ('name', {str}),
'format_note': 'label', 'format_note': ('label', {str}),
'width': ('width', {int}), 'width': ('width', {int}),
'height': ('height', {int}), 'height': ('height', {int}),
}), }),
@ -164,38 +239,28 @@ def format_path(params):
}) })
items.append({ items.append({
**common_info,
'id': media_id, 'id': media_id,
**traverse_obj(metadata, { **traverse_obj(metadata, {
'title': 'title', 'title': ('title', {str}),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),
'thumbnail': ('thumbnail', 'path'), 'thumbnail': ('thumbnail', 'path', {url_or_none}),
}), }),
'formats': formats, 'formats': formats,
}) })
uploader_url = format_field(
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname')))
post_info = { post_info = {
**common_info,
'id': post_id, 'id': post_id,
'display_id': post_id, 'display_id': post_id,
**traverse_obj(post_data, { **traverse_obj(post_data, {
'title': 'title', 'title': ('title', {str}),
'description': ('text', {clean_html}), 'description': ('text', {clean_html}),
'uploader': ('creator', 'title'),
'uploader_id': ('creator', 'id'),
'channel': ('channel', 'title'),
'channel_id': ('channel', 'id'),
'like_count': ('likes', {int_or_none}), 'like_count': ('likes', {int_or_none}),
'dislike_count': ('dislikes', {int_or_none}), 'dislike_count': ('dislikes', {int_or_none}),
'comment_count': ('comments', {int_or_none}), 'comment_count': ('comments', {int_or_none}),
'release_timestamp': ('releaseDate', {parse_iso8601}), 'thumbnail': ('thumbnail', 'path', {url_or_none}),
'thumbnail': ('thumbnail', 'path'),
}), }),
'uploader_url': uploader_url,
'channel_url': channel_url,
'availability': self._availability(needs_subscription=True),
} }
if len(items) > 1: if len(items) > 1: