mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-23 11:31:29 +01:00
Merge 311bb3b02a
into 52c0ffe40a
This commit is contained in:
commit
ddf4b3e011
|
@ -478,6 +478,7 @@
|
|||
from .dailywire import (
|
||||
DailyWireIE,
|
||||
DailyWirePodcastIE,
|
||||
DailyWireShowIE,
|
||||
)
|
||||
from .damtomo import (
|
||||
DamtomoRecordIE,
|
||||
|
|
|
@ -1,28 +1,72 @@
|
|||
import itertools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DailyWireBaseIE(InfoExtractor):
|
||||
_JSON_PATH = {
|
||||
'episode': ('props', 'pageProps', 'episodeData', 'episode'),
|
||||
'videos': ('props', 'pageProps', 'videoData', 'video'),
|
||||
'podcasts': ('props', 'pageProps', 'episode'),
|
||||
_GRAPHQL_API = 'https://v2server.dailywire.com/app/graphql'
|
||||
_GRAPHQL_QUERIES = {
|
||||
'getClipBySlug': 'query getClipBySlug($slug:String!){clip(where:{slug:$slug}){id,name,slug,description,image,show{id,name,slug},thumbnail,duration,createdBy{firstName,lastName},createdAt,videoURL}}',
|
||||
'getEpisodeBySlug': 'query getEpisodeBySlug($slug:String!){episode(where:{slug:$slug}){id,title,slug,description,createdAt,image,show{id,name,slug},segments{audio,video,duration,},createdBy{firstName,lastName}}}',
|
||||
'getPodcastEpisodes': 'query getPodcastEpisodes($where: PodcastEpisodeWhereInput, $orderBy: PodcastEpisodeOrderBy, $skip: Int, $first: Int) {listPodcastEpisode(where: $where, orderBy: $orderBy, skip: $skip, first: $first) {...ResPodcastEpisode}}, fragment ResPodcastEpisode on getPodcastEpisodeRes {id,title,description,slug,thumbnail,createdAt,audio,duration,podcast {id,name,slug,author {firstName,lastName}},season {id,name,slug}}',
|
||||
'getSeasonEpisodes': 'query getSeasonEpisodes($where:getSeasonEpisodesInput!,$first:Int,$skip:Int){getSeasonEpisodes(where:$where,first:$first,skip:$skip){episode{slug}}}',
|
||||
'getShowBySlug': 'query getShowBySlug($slug:String!){show(where:{slug:$slug}){id,name,description,image,seasons(orderBy:weight_DESC){id,name,slug}}}',
|
||||
'getVideoBySlug': 'query getVideoBySlug($slug:String!){video(where:{slug:$slug}){id,name,slug,description,image,thumbnail,videoURL,duration,createdBy{firstName,lastName},createdAt}}',
|
||||
}
|
||||
_GRAPHQL_VIDEO_QUERIES = {
|
||||
'clips': 'getClipBySlug',
|
||||
'episode': 'getEpisodeBySlug',
|
||||
'videos': 'getVideoBySlug',
|
||||
}
|
||||
_GRAPHQL_JSON_PATH = {
|
||||
'getClipBySlug': ('data', 'clip'),
|
||||
'getEpisodeBySlug': ('data', 'episode'),
|
||||
'getPodcastEpisodes': ('data', 'listPodcastEpisode'),
|
||||
'getSeasonEpisodes': ('data', 'getSeasonEpisodes', ..., 'episode', 'slug'),
|
||||
'getShowBySlug': ('data', 'show'),
|
||||
'getVideoBySlug': ('data', 'video'),
|
||||
}
|
||||
_API_HEADERS = {
|
||||
'Apollographql-Client-Name': 'DW_WEBSITE',
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'https://www.dailywire.com',
|
||||
'Referer': 'https://www.dailywire.com/',
|
||||
}
|
||||
|
||||
def _get_json(self, url):
|
||||
sites_type, slug = self._match_valid_url(url).group('sites_type', 'id')
|
||||
json_data = self._search_nextjs_data(self._download_webpage(url, slug), slug)
|
||||
return slug, traverse_obj(json_data, self._JSON_PATH[sites_type])
|
||||
def _real_initialize(self):
|
||||
if access_token := self._get_cookies('https://www.dailywire.com').get('accessToken'):
|
||||
self._API_HEADERS['Authorization'] = f'Bearer {access_token.value}'
|
||||
|
||||
def _call_api(self, slug, query, variables, message='Downloading JSON from GraphQL API'):
|
||||
json_data = self._download_json(
|
||||
self._GRAPHQL_API, slug, message, data=json.dumps(
|
||||
{'query': self._GRAPHQL_QUERIES[query], 'variables': variables}).encode(),
|
||||
headers=self._API_HEADERS)
|
||||
|
||||
return traverse_obj(json_data, self._GRAPHQL_JSON_PATH.get(query, ()))
|
||||
|
||||
def _paginate(self, slug, query, where):
|
||||
for i in itertools.count(0):
|
||||
page = self._call_api(
|
||||
slug, query, {'where': where, 'first': 10, 'skip': i * 10},
|
||||
message=f'Downloading page {i + 1}')
|
||||
if not page:
|
||||
break
|
||||
yield page
|
||||
|
||||
|
||||
class DailyWireIE(DailyWireBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>episode|videos)/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>episode|videos|clips)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailywire.com/episode/1-fauci',
|
||||
'info_dict': {
|
||||
|
@ -32,32 +76,79 @@ class DailyWireIE(DailyWireBaseIE):
|
|||
'title': '1. Fauci',
|
||||
'description': 'md5:9df630347ef85081b7e97dd30bc22853',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/episodes/ckzsl50xnqpy30850in3v4bu7/ckzsl50xnqpy30850in3v4bu7-1648237399554.jpg',
|
||||
'creator': 'Caroline Roberts',
|
||||
'series_id': 'ckzplm0a097fn0826r2vc3j7h',
|
||||
'series': 'China: The Enemy Within',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailywire.com/episode/ep-124-bill-maher',
|
||||
'info_dict': {
|
||||
'id': 'cl0ngbaalplc80894sfdo9edf',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'ep-124-bill-maher',
|
||||
'title': 'Ep. 124 - Bill Maher',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/episodes/cl0ngbaalplc80894sfdo9edf/cl0ngbaalplc80894sfdo9edf-1647065568518.jpg',
|
||||
'creator': 'Caroline Roberts',
|
||||
'description': 'md5:adb0de584bcfa9c41374999d9e324e98',
|
||||
'series_id': 'cjzvep7270hp00786l9hwccob',
|
||||
'series': 'The Sunday Special',
|
||||
'upload_date': '20220218',
|
||||
'creators': ['Caroline Roberts'],
|
||||
'timestamp': 1645182003,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailywire.com/videos/the-hyperions',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'skip': 'premium only',
|
||||
'url': 'https://www.dailywire.com/episode/ep-3-avery-s-niece-new',
|
||||
'info_dict': {
|
||||
'id': 'clm8geguv3qku0870ewvcu0ed',
|
||||
'display_id': 'ep-3-avery-s-niece-new',
|
||||
'title': 'Ep 3 - Avery’s Niece',
|
||||
'description': 'md5:861ab336bd2bab2abebc25a1479a42e0',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/episodes/clm8geguv3qku0870ewvcu0ed/clm8geguv3qku0870ewvcu0ed-1694047935734.png',
|
||||
'series_id': 'clim20ue5f8160838ecz7ba8q',
|
||||
'ext': 'mp4',
|
||||
'subtitles': {'en-US': [{'ext': 'vtt'}]},
|
||||
'timestamp': 1694062826,
|
||||
'series': 'Convicting a Murderer',
|
||||
'creators': ['Scott Bowler '],
|
||||
'upload_date': '20230907',
|
||||
},
|
||||
}, {
|
||||
'skip': 'premium only',
|
||||
'url': 'https://www.dailywire.com/clips/the-making-of-run-hide-fight',
|
||||
'info_dict': {
|
||||
'id': 'ckjutyd6810dd0806ivcq2526',
|
||||
'display_id': 'the-making-of-run-hide-fight',
|
||||
'title': 'The Making of Run Hide Fight',
|
||||
'description': 'md5:085297d753b73ad87bdd8b050cc10d2c',
|
||||
'thumbnail': 'https://image.media.dailywire.com/K7OqsPwWH5c9hpWT68CHeZ4vRUtoz5Le/thumbnail.png',
|
||||
'duration': 916.790889,
|
||||
'creators': ['Paul Snyder'],
|
||||
'upload_date': '20210113',
|
||||
'timestamp': 1610506443,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'skip': 'premium only',
|
||||
'url': 'https://www.dailywire.com/videos/choosing-death-the-legacy-of-roe',
|
||||
'info_dict': {
|
||||
'id': 'cl3260dva6pjr097819zw506s',
|
||||
'display_id': 'choosing-death-the-legacy-of-roe',
|
||||
'title': 'Choosing Death [The Legacy of Roe]',
|
||||
'description': 'md5:b07597f0ef32130365427a05fd1ccd25',
|
||||
'duration': 2618.0738,
|
||||
'timestamp': 1652308821,
|
||||
'upload_date': '20220511',
|
||||
'thumbnail': 'https://image.media.dailywire.com/FBgIBgmq635VuqTgWKjcGviEjJ2vJ02Zz/thumbnail.png',
|
||||
'subtitles': {'en-US': [{'ext': 'vtt'}]},
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, episode_info = self._get_json(url)
|
||||
urls = traverse_obj(
|
||||
episode_info, (('segments', 'videoUrl'), ..., ('video', 'audio')), expected_type=url_or_none)
|
||||
sites_type, slug = self._match_valid_url(url).group('sites_type', 'id')
|
||||
episode_data = self._call_api(slug, self._GRAPHQL_VIDEO_QUERIES[sites_type], {'slug': slug})
|
||||
|
||||
if not episode_data:
|
||||
raise ExtractorError('video not found')
|
||||
|
||||
urls = traverse_obj(episode_data,
|
||||
(('segments', 'clips'), ..., ('video', 'audio')),
|
||||
) or [episode_data.get('videoURL')]
|
||||
|
||||
if 'Access Denied' in urls:
|
||||
self.report_warning(f'It looks like {slug} requires a login. Try passing cookies and try again.')
|
||||
|
||||
urls = [url_or_none(u) for u in urls if url_or_none(u)]
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for url in urls:
|
||||
|
@ -68,24 +159,27 @@ def _real_extract(self, url):
|
|||
formats.extend(format_)
|
||||
self._merge_subtitles(subs_, target=subtitles)
|
||||
return {
|
||||
'id': episode_info['id'],
|
||||
'id': episode_data.get('id'),
|
||||
'display_id': slug,
|
||||
'title': traverse_obj(episode_info, 'title', 'name'),
|
||||
'description': episode_info.get('description'),
|
||||
'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'), from_dict=episode_info, delim=' '),
|
||||
'duration': float_or_none(episode_info.get('duration')),
|
||||
'is_live': episode_info.get('isLive'),
|
||||
'thumbnail': traverse_obj(episode_info, 'thumbnail', 'image', expected_type=url_or_none),
|
||||
'title': traverse_obj(episode_data, 'title', 'name'),
|
||||
'description': episode_data.get('description'),
|
||||
'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'),
|
||||
from_dict=episode_data, delim=' '),
|
||||
'duration': float_or_none(episode_data.get('duration')),
|
||||
'timestamp': parse_iso8601(episode_data.get('createdAt')),
|
||||
'is_live': episode_data.get('isLive'),
|
||||
'thumbnail': traverse_obj(episode_data, 'thumbnail', 'image', expected_type=url_or_none),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series_id': traverse_obj(episode_info, ('show', 'id')),
|
||||
'series': traverse_obj(episode_info, ('show', 'name')),
|
||||
'series_id': traverse_obj(episode_data, ('show', 'id')),
|
||||
'series': traverse_obj(episode_data, ('show', 'name')),
|
||||
}
|
||||
|
||||
|
||||
class DailyWirePodcastIE(DailyWireBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>podcasts)/(?P<podcaster>[\w-]+/(?P<id>[\w-]+))'
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>podcasts)/(?P<podcaster>[\w-]+)/?(?P<id>[\w-]+)?'
|
||||
_TESTS = [{
|
||||
'note': 'serves shorter ad-free stream with paid cookies',
|
||||
'url': 'https://www.dailywire.com/podcasts/morning-wire/get-ready-for-recession-6-15-22',
|
||||
'info_dict': {
|
||||
'id': 'cl4f01d0w8pbe0a98ydd0cfn1',
|
||||
|
@ -93,21 +187,118 @@ class DailyWirePodcastIE(DailyWireBaseIE):
|
|||
'display_id': 'get-ready-for-recession-6-15-22',
|
||||
'title': 'Get Ready for Recession | 6.15.22',
|
||||
'description': 'md5:c4afbadda4e1c38a4496f6d62be55634',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1639506575562.jpg',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1667859984424.jpg',
|
||||
'duration': 900.117667,
|
||||
'timestamp': 1655261631,
|
||||
'season_id': 'morning-wire-morning-wire-podcast-season',
|
||||
'series_id': 'morning-wire',
|
||||
'creators': ['Georgia Howe'],
|
||||
'season': '2022',
|
||||
'series': 'Morning Wire',
|
||||
'upload_date': '20220615',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailywire.com/podcasts/enough',
|
||||
'info_dict': {
|
||||
'id': 'ckx4kvm8710i80869lvuu1b8z',
|
||||
'title': 'Enough',
|
||||
'display_id': 'enough',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, episode_info = self._get_json(url)
|
||||
audio_id = traverse_obj(episode_info, 'audioMuxPlaybackId', 'VUsAipTrBVSgzw73SpC2DAJD401TYYwEp')
|
||||
podcaster, slug = self._match_valid_url(url).group('podcaster', 'id')
|
||||
|
||||
return {
|
||||
'id': episode_info['id'],
|
||||
'url': f'https://stream.media.dailywire.com/{audio_id}/audio.m4a',
|
||||
'display_id': slug,
|
||||
'title': episode_info.get('title'),
|
||||
'duration': float_or_none(episode_info.get('duration')),
|
||||
'thumbnail': episode_info.get('thumbnail'),
|
||||
'description': episode_info.get('description'),
|
||||
}
|
||||
def _extract_pod_ep_info(episode_data):
|
||||
return {
|
||||
'id': episode_data.get('id'),
|
||||
'url': episode_data.get('audio'),
|
||||
'display_id': episode_data.get('slug'),
|
||||
'title': episode_data.get('title'),
|
||||
'duration': float_or_none(episode_data.get('duration')),
|
||||
'timestamp': parse_iso8601(episode_data.get('createdAt')),
|
||||
'thumbnail': episode_data.get('thumbnail'),
|
||||
'description': episode_data.get('description'),
|
||||
'creator': join_nonempty(('podcast', 'author', 'firstName'),
|
||||
('podcast', 'author', 'lastName'),
|
||||
from_dict=episode_data, delim=' '),
|
||||
'season': traverse_obj(episode_data, ('season', 'name')),
|
||||
'season_id': traverse_obj(episode_data, ('season', 'slug')),
|
||||
'series': traverse_obj(episode_data, ('podcast', 'name')),
|
||||
'series_id': traverse_obj(episode_data, ('podcast', 'slug')),
|
||||
}
|
||||
|
||||
if slug:
|
||||
episodes = self._call_api(slug, 'getPodcastEpisodes', {'where': {'slug': slug}})
|
||||
if episode_data := traverse_obj(episodes, ..., get_all=False):
|
||||
return _extract_pod_ep_info(episode_data)
|
||||
else:
|
||||
episodes = [
|
||||
episode for page in
|
||||
self._paginate(podcaster, 'getPodcastEpisodes', {'podcast': {'slug': podcaster}})
|
||||
for episode in page
|
||||
]
|
||||
|
||||
if episodes:
|
||||
podcast_data = traverse_obj(episodes, (..., 'podcast'), {}, get_all=False)
|
||||
return self.playlist_result(
|
||||
[_extract_pod_ep_info(e) for e in episodes],
|
||||
podcast_data.get('id'), podcast_data.get('name'), podcast_data.get('description'),
|
||||
display_id=podcast_data.get('slug'), thumbnail=podcast_data.get('coverImage'))
|
||||
|
||||
raise ExtractorError('Podcast not found')
|
||||
|
||||
|
||||
class DailyWireShowIE(DailyWireBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>show)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'skip': 'premium only',
|
||||
'url': 'https://www.dailywire.com/show/apollo-11-what-we-saw',
|
||||
'playlist_mincount': 28,
|
||||
'info_dict': {
|
||||
'id': 'ckixsvamonvl40862ysxve50i',
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/shows/ckixsvamonvl40862ysxve50i-1679082975554.jpg',
|
||||
'title': 'What We Saw',
|
||||
'description': 'md5:98d2a7d5cc8175494a4ca611058ed440',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'cltf80tk79fxi0942c7h394b5',
|
||||
'season_id': 'what-we-saw-season-3-an-empire-of-terror-season',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'season-3-an-empire-of-terror',
|
||||
'series_id': 'ckixsvamonvl40862ysxve50i',
|
||||
'title': 'Season 3: An Empire of Terror',
|
||||
'description': 'What We Saw: An Empire of Terror premieres on March 6, 2024.',
|
||||
'creators': ['Scott Bowler '],
|
||||
'upload_date': '20240306',
|
||||
'timestamp': 1709704832,
|
||||
'thumbnail': 'https://daily-wire-production.imgix.net/episodes/cltf80tk79fxi0942c7h394b5/cltf80tk79fxi0942c7h394b5-1709694601671.png',
|
||||
'series': 'What We Saw',
|
||||
}}],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug = self._match_valid_url(url).group('id')
|
||||
|
||||
show_data = self._call_api(slug, 'getShowBySlug', {'slug': slug})
|
||||
if not show_data:
|
||||
raise ExtractorError('Show not found')
|
||||
|
||||
for season_data in show_data.get('seasons', []):
|
||||
season_data['episodes'] = [
|
||||
episode for page in
|
||||
self._paginate(season_data.get('slug'), 'getSeasonEpisodes', {'season': {'id': season_data.get('id')}})
|
||||
for episode in page
|
||||
]
|
||||
|
||||
return self.playlist_result(
|
||||
[self.url_result(f'https://www.dailywire.com/episode/{episode_slug}',
|
||||
season_id=season_data.get('slug'), season=season_data.get('title'), url_transparent=True)
|
||||
for season_data in show_data.get('seasons', []) for episode_slug in season_data['episodes']],
|
||||
show_data.get('id'), show_data.get('name'), show_data.get('description'),
|
||||
thumbnail=show_data.get('image'))
|
||||
|
|
Loading…
Reference in New Issue
Block a user