mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 07:28:19 +01:00
Compare commits
10 Commits
849d9c3686
...
4f24c1fbf2
Author | SHA1 | Date | |
---|---|---|---|
|
4f24c1fbf2 | ||
|
4b5eec0aaa | ||
|
560bcb5291 | ||
|
44c8cdb728 | ||
|
19003f882b | ||
|
6f1db75869 | ||
|
4f31126f07 | ||
|
e1b623cea1 | ||
|
59cdcf7795 | ||
|
a03cd32b71 |
|
@ -1,3 +1,4 @@
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
@ -8,6 +9,7 @@
|
||||||
float_or_none,
|
float_or_none,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
unescapeHTML,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -19,7 +21,7 @@
|
||||||
class CeskaTelevizeIE(InfoExtractor):
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
|
'url': 'https://www.ceskatelevize.cz/porady/10441294653-hyde-park-civilizace/bonus/20641/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '61924494877028507',
|
'id': '61924494877028507',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -27,6 +29,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
'description': 'English Subtittles',
|
'description': 'English Subtittles',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 81.3,
|
'duration': 81.3,
|
||||||
|
'live_status': 'not_live',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -34,13 +37,16 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# live stream
|
# live stream
|
||||||
'url': 'http://www.ceskatelevize.cz/zive/ct1/',
|
'url': 'https://www.ceskatelevize.cz/zive/ct1/',
|
||||||
|
'only_matching': True,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '102',
|
'id': '61924494878124436',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r'ČT1 - živé vysílání online',
|
'title': r're:^ČT1 - živé vysílání online \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
|
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
|
||||||
'is_live': True,
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 5373.3,
|
||||||
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -48,18 +54,19 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# another
|
# another
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
'url': 'https://www.ceskatelevize.cz/zive/sport/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '402',
|
'id': '422',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
'is_live': True,
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
# 'skip': 'Georestricted to Czech Republic',
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
}, {
|
||||||
# video with 18+ caution trailer
|
# video with 18+ caution trailer
|
||||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||||
|
@ -74,6 +81,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bogotart - Queer (Varování 18+)',
|
'title': 'Bogotart - Queer (Varování 18+)',
|
||||||
'duration': 11.9,
|
'duration': 11.9,
|
||||||
|
'live_status': 'not_live',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -82,6 +90,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
'title': 'Bogotart - Queer (Queer)',
|
'title': 'Bogotart - Queer (Queer)',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 1558.3,
|
'duration': 1558.3,
|
||||||
|
'live_status': 'not_live',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -91,7 +100,19 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
# iframe embed
|
# iframe embed
|
||||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': '61924494877628660',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Epizoda 1/13 - Neviditelní',
|
||||||
|
'description': 'Vypadají jako my, mluví jako my, ale mají něco navíc – gen, který jim umožňuje dýchat vodu. Aniž to tušíme, žijí mezi námi.',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 3576.8,
|
||||||
|
'live_status': 'not_live',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -106,26 +127,83 @@ def _real_extract(self, url):
|
||||||
if playlist_description:
|
if playlist_description:
|
||||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||||
|
|
||||||
type_ = 'IDEC'
|
type_ = 'episode'
|
||||||
|
is_live = False
|
||||||
if re.search(r'(^/porady|/zive)/', parsed_url.path):
|
if re.search(r'(^/porady|/zive)/', parsed_url.path):
|
||||||
next_data = self._search_nextjs_data(webpage, playlist_id)
|
next_data = self._search_nextjs_data(webpage, playlist_id)
|
||||||
if '/zive/' in parsed_url.path:
|
if '/zive/' in parsed_url.path:
|
||||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
|
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
|
||||||
|
sidp = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'showId'), get_all=False)
|
||||||
|
is_live = True
|
||||||
else:
|
else:
|
||||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
|
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
|
||||||
if not idec:
|
if not idec:
|
||||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
|
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
|
||||||
if idec:
|
if idec:
|
||||||
type_ = 'bonus'
|
type_ = 'bonus'
|
||||||
|
sidp = self._search_regex(r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/([0-9]+)-', url, playlist_id, default=playlist_id)
|
||||||
if not idec:
|
if not idec:
|
||||||
raise ExtractorError('Failed to find IDEC id')
|
raise ExtractorError('Failed to find IDEC id')
|
||||||
iframe_hash = self._download_webpage(
|
sidp = sidp.rsplit('-')[0]
|
||||||
'https://www.ceskatelevize.cz/v-api/iframe-hash/',
|
query = {'origin': 'iVysilani', 'autoStart': 'true', 'sidp': sidp, type_: idec}
|
||||||
playlist_id, note='Getting IFRAME hash')
|
|
||||||
query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec}
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
|
'https://player.ceskatelevize.cz/',
|
||||||
playlist_id, note='Downloading player', query=query)
|
playlist_id, note='Downloading player', query=query)
|
||||||
|
playlistpage_url = 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/'
|
||||||
|
data = {
|
||||||
|
'playlist[0][type]': type_,
|
||||||
|
'playlist[0][id]': idec,
|
||||||
|
'requestUrl': parsed_url.path,
|
||||||
|
'requestSource': 'iVysilani',
|
||||||
|
}
|
||||||
|
elif parsed_url.path == '/' and parsed_url.fragment == 'live':
|
||||||
|
if self._search_regex(r'(?s)<section[^>]+id=[\'"]live[\'"][^>]+data-ctcomp-data=\'([^\']+)\'[^>]*>', webpage, 'live video player', default=None):
|
||||||
|
# CT4
|
||||||
|
is_live = True
|
||||||
|
ctcomp_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)<section[^>]+id=[\'"]live[\'"][^>]+data-ctcomp-data=\'([^\']+)\'[^>]*>',
|
||||||
|
webpage, 'ctcomp data', fatal=True),
|
||||||
|
playlist_id, transform_source=unescapeHTML)
|
||||||
|
current_item = traverse_obj(ctcomp_data, ('items', ctcomp_data.get('currentItem'), 'items', 0, 'video', 'data', 'source', 'playlist', 0))
|
||||||
|
playlistpage_url = 'https://playlist.ceskatelevize.cz/'
|
||||||
|
data = {
|
||||||
|
'contentType': 'live',
|
||||||
|
'items': [{
|
||||||
|
'id': current_item.get('id'),
|
||||||
|
'key': current_item.get('key'),
|
||||||
|
'assetId': current_item.get('assetId'),
|
||||||
|
'playerType': 'dash',
|
||||||
|
'date': current_item.get('date'),
|
||||||
|
'requestSource': current_item.get('requestSource'),
|
||||||
|
'drm': current_item.get('drm'),
|
||||||
|
'quality': current_item.get('quality'),
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
data = {'data': json.dumps(data).encode('utf-8')}
|
||||||
|
else:
|
||||||
|
# CT24
|
||||||
|
is_live = True
|
||||||
|
lvp_url = self._search_regex(
|
||||||
|
r'(?s)<div[^>]+id=[\'"]live-video-player[\'"][^>]+data-url=[\'"]([^\'"]+)[\'"][^>]*>',
|
||||||
|
webpage, 'live video player', fatal=True)
|
||||||
|
lvp_hash = self._search_regex(
|
||||||
|
r'(?s)media_ivysilani: *{ *hash *: *[\'"]([0-9a-f]+)[\'"] *}',
|
||||||
|
webpage, 'live video hash', fatal=True)
|
||||||
|
lvp_url += '&hash=' + lvp_hash
|
||||||
|
webpage = self._download_webpage(unescapeHTML(lvp_url), playlist_id)
|
||||||
|
playlistpage = self._search_regex(
|
||||||
|
r'(?s)getPlaylistUrl\((\[[^\]]+\])[,\)]',
|
||||||
|
webpage, 'playlist params', fatal=True)
|
||||||
|
playlistpage_params = self._parse_json(playlistpage, playlist_id)[0]
|
||||||
|
playlistpage_url = 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/'
|
||||||
|
idec = playlistpage_params.get('id')
|
||||||
|
data = {
|
||||||
|
'playlist[0][type]': playlistpage_params.get('type'),
|
||||||
|
'playlist[0][id]': idec,
|
||||||
|
'requestUrl': '/ivysilani/embed/iFramePlayer.php',
|
||||||
|
'requestSource': 'iVysilani',
|
||||||
|
}
|
||||||
|
|
||||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||||
if f'{NOT_AVAILABLE_STRING}</p>' in webpage:
|
if f'{NOT_AVAILABLE_STRING}</p>' in webpage:
|
||||||
|
@ -133,40 +211,10 @@ def _real_extract(self, url):
|
||||||
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen')):
|
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen')):
|
||||||
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
|
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
|
||||||
|
|
||||||
type_ = None
|
|
||||||
episode_id = None
|
|
||||||
|
|
||||||
playlist = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
|
|
||||||
default='{}'), playlist_id)
|
|
||||||
if playlist:
|
|
||||||
type_ = playlist.get('type')
|
|
||||||
episode_id = playlist.get('id')
|
|
||||||
|
|
||||||
if not type_:
|
|
||||||
type_ = self._html_search_regex(
|
|
||||||
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
|
|
||||||
webpage, 'type')
|
|
||||||
if not episode_id:
|
|
||||||
episode_id = self._html_search_regex(
|
|
||||||
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
|
|
||||||
webpage, 'episode_id')
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'playlist[0][type]': type_,
|
|
||||||
'playlist[0][id]': episode_id,
|
|
||||||
'requestUrl': parsed_url.path,
|
|
||||||
'requestSource': 'iVysilani',
|
|
||||||
}
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for user_agent in (None, USER_AGENTS['Safari']):
|
for user_agent in (None, USER_AGENTS['Safari']):
|
||||||
req = Request(
|
req = Request(playlistpage_url, data=urlencode_postdata(data))
|
||||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
|
|
||||||
data=urlencode_postdata(data))
|
|
||||||
|
|
||||||
req.headers['Content-type'] = 'application/x-www-form-urlencoded'
|
req.headers['Content-type'] = 'application/x-www-form-urlencoded'
|
||||||
req.headers['x-addr'] = '127.0.0.1'
|
req.headers['x-addr'] = '127.0.0.1'
|
||||||
req.headers['X-Requested-With'] = 'XMLHttpRequest'
|
req.headers['X-Requested-With'] = 'XMLHttpRequest'
|
||||||
|
@ -179,25 +227,25 @@ def _real_extract(self, url):
|
||||||
if not playlistpage:
|
if not playlistpage:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
playlist_url = playlistpage['url']
|
playlist_url = playlistpage.get('url')
|
||||||
if playlist_url == 'error_region':
|
if playlist_url:
|
||||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
if playlist_url == 'error_region':
|
||||||
|
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||||
|
req = Request(urllib.parse.unquote(playlist_url))
|
||||||
|
req.headers['Referer'] = url
|
||||||
|
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||||
|
if not playlist:
|
||||||
|
continue
|
||||||
|
playlist = playlist.get('playlist')
|
||||||
|
else:
|
||||||
|
playlist = traverse_obj(playlistpage, ('RESULT', 'playlist'))
|
||||||
|
|
||||||
req = Request(urllib.parse.unquote(playlist_url))
|
|
||||||
req.headers['Referer'] = url
|
|
||||||
|
|
||||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
|
||||||
if not playlist:
|
|
||||||
continue
|
|
||||||
|
|
||||||
playlist = playlist.get('playlist')
|
|
||||||
if not isinstance(playlist, list):
|
if not isinstance(playlist, list):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
playlist_len = len(playlist)
|
playlist_len = len(playlist)
|
||||||
|
|
||||||
for num, item in enumerate(playlist):
|
for num, item in enumerate(playlist):
|
||||||
is_live = item.get('type') == 'LIVE'
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||||
if 'playerType=flash' in stream_url:
|
if 'playerType=flash' in stream_url:
|
||||||
|
@ -222,7 +270,7 @@ def _real_extract(self, url):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
item_id = str_or_none(item.get('id') or item['assetId'])
|
item_id = str_or_none(item.get('id') or item['assetId'])
|
||||||
title = item['title']
|
title = item.get('title') or 'live'
|
||||||
|
|
||||||
duration = float_or_none(item.get('duration'))
|
duration = float_or_none(item.get('duration'))
|
||||||
thumbnail = item.get('previewImageUrl')
|
thumbnail = item.get('previewImageUrl')
|
||||||
|
@ -231,7 +279,7 @@ def _real_extract(self, url):
|
||||||
if item.get('type') == 'VOD':
|
if item.get('type') == 'VOD':
|
||||||
subs = item.get('subtitles')
|
subs = item.get('subtitles')
|
||||||
if subs:
|
if subs:
|
||||||
subtitles = self.extract_subtitles(episode_id, subs)
|
subtitles = self.extract_subtitles(idec, subs)
|
||||||
|
|
||||||
if playlist_len == 1:
|
if playlist_len == 1:
|
||||||
final_title = playlist_title or title
|
final_title = playlist_title or title
|
||||||
|
@ -246,7 +294,7 @@ def _real_extract(self, url):
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'is_live': is_live,
|
'live_status': 'is_live' if is_live else 'not_live',
|
||||||
})
|
})
|
||||||
|
|
||||||
if len(entries) == 1:
|
if len(entries) == 1:
|
||||||
|
|
|
@ -59,16 +59,15 @@ def _extract_from_api(self, video_id, tld):
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
}, fatal=False, impersonate=True) or {}
|
}, fatal=False, impersonate=True) or {}
|
||||||
|
|
||||||
status = response.get('room_status')
|
|
||||||
if status != 'public':
|
|
||||||
if error := self._ERROR_MAP.get(status):
|
|
||||||
raise ExtractorError(error, expected=True)
|
|
||||||
self.report_warning('Falling back to webpage extraction')
|
|
||||||
return None
|
|
||||||
|
|
||||||
m3u8_url = response.get('url')
|
m3u8_url = response.get('url')
|
||||||
if not m3u8_url:
|
if not m3u8_url:
|
||||||
self.raise_geo_restricted()
|
status = response.get('room_status')
|
||||||
|
if error := self._ERROR_MAP.get(status):
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
if status == 'public':
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
self.report_warning(f'Got status "{status}" from API; falling back to webpage extraction')
|
||||||
|
return None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user