mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 15:40:18 +01:00
2d49720f89
Authored-by: zmousm, dirkf
317 lines
13 KiB
Python
317 lines
13 KiB
Python
# coding: utf-8
|
||
from __future__ import unicode_literals
|
||
|
||
import json
|
||
import re
|
||
|
||
from .common import InfoExtractor
|
||
from ..compat import compat_str
|
||
from ..utils import (
|
||
clean_html,
|
||
determine_ext,
|
||
ExtractorError,
|
||
dict_get,
|
||
int_or_none,
|
||
merge_dicts,
|
||
parse_qs,
|
||
parse_age_limit,
|
||
parse_iso8601,
|
||
str_or_none,
|
||
try_get,
|
||
unescapeHTML,
|
||
url_or_none,
|
||
variadic,
|
||
)
|
||
|
||
|
||
class ERTFlixBaseIE(InfoExtractor):
|
||
def _call_api(
|
||
self, video_id, method='Player/AcquireContent', api_version=1,
|
||
param_headers=None, data=None, headers=None, **params):
|
||
platform_codename = {'platformCodename': 'www'}
|
||
headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False}
|
||
headers_as_param.update(param_headers or {})
|
||
headers = headers or {}
|
||
if data:
|
||
headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
|
||
data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8')
|
||
query = merge_dicts(
|
||
{} if data else platform_codename,
|
||
{'$headers': json.dumps(headers_as_param)},
|
||
params)
|
||
response = self._download_json(
|
||
'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method),
|
||
video_id, fatal=False, query=query, data=data, headers=headers)
|
||
if try_get(response, lambda x: x['Result']['Success']) is True:
|
||
return response
|
||
|
||
def _call_api_get_tiles(self, video_id, *tile_ids):
|
||
requested_tile_ids = [video_id] + list(tile_ids)
|
||
requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
|
||
tiles_response = self._call_api(
|
||
video_id, method='Tile/GetTiles', api_version=2,
|
||
data={'RequestedTiles': requested_tiles})
|
||
tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or []
|
||
if tile_ids:
|
||
if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids):
|
||
raise ExtractorError('Requested tiles not found', video_id=video_id)
|
||
return tiles
|
||
try:
|
||
return next(tile for tile in tiles if tile['Id'] == video_id)
|
||
except StopIteration:
|
||
raise ExtractorError('No matching tile found', video_id=video_id)
|
||
|
||
|
||
class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||
IE_NAME = 'ertflix:codename'
|
||
IE_DESC = 'ERTFLIX videos by codename'
|
||
_VALID_URL = r'ertflix:(?P<id>[\w-]+)'
|
||
_TESTS = [{
|
||
'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
|
||
'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
|
||
'info_dict': {
|
||
'id': 'monogramma-praxitelis-tzanoylinos',
|
||
'ext': 'mp4',
|
||
'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
|
||
},
|
||
},
|
||
]
|
||
|
||
def _extract_formats_and_subs(self, video_id, allow_none=True):
|
||
media_info = self._call_api(video_id, codename=video_id)
|
||
formats, subs = [], {}
|
||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||
if not fmt_url:
|
||
continue
|
||
ext = determine_ext(fmt_url)
|
||
if ext == 'm3u8':
|
||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||
elif ext == 'mpd':
|
||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||
else:
|
||
formats.append({
|
||
'url': fmt_url,
|
||
'format_id': str_or_none(media.get('Id')),
|
||
})
|
||
continue
|
||
formats.extend(formats_)
|
||
self._merge_subtitles(subs_, target=subs)
|
||
|
||
if formats or not allow_none:
|
||
self._sort_formats(formats)
|
||
return formats, subs
|
||
|
||
def _real_extract(self, url):
|
||
video_id = self._match_id(url)
|
||
|
||
formats, subs = self._extract_formats_and_subs(video_id)
|
||
|
||
if formats:
|
||
return {
|
||
'id': video_id,
|
||
'formats': formats,
|
||
'subtitles': subs,
|
||
'title': self._generic_title(url),
|
||
}
|
||
|
||
|
||
class ERTFlixIE(ERTFlixBaseIE):
|
||
IE_NAME = 'ertflix'
|
||
IE_DESC = 'ERTFLIX videos'
|
||
_VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
|
||
_TESTS = [{
|
||
'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
|
||
'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
|
||
'info_dict': {
|
||
'id': 'aoratoi-ergates',
|
||
'ext': 'mp4',
|
||
'title': 'md5:c1433d598fbba0211b0069021517f8b4',
|
||
'description': 'md5:01a64d113c31957eb7eb07719ab18ff4',
|
||
'thumbnail': r're:https?://.+\.jpg',
|
||
'episode_id': 'vod.173258',
|
||
'timestamp': 1639648800,
|
||
'upload_date': '20211216',
|
||
'duration': 3166,
|
||
'age_limit': 8,
|
||
},
|
||
}, {
|
||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
|
||
'info_dict': {
|
||
'id': 'ser.3448',
|
||
'age_limit': 8,
|
||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||
'title': 'Μονόγραμμα',
|
||
},
|
||
'playlist_mincount': 64,
|
||
}, {
|
||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1',
|
||
'info_dict': {
|
||
'id': 'ser.3448',
|
||
'age_limit': 8,
|
||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||
'title': 'Μονόγραμμα',
|
||
},
|
||
'playlist_count': 22,
|
||
}, {
|
||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
|
||
'info_dict': {
|
||
'id': 'ser.3448',
|
||
'age_limit': 8,
|
||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||
'title': 'Μονόγραμμα',
|
||
},
|
||
'playlist_mincount': 36,
|
||
}, {
|
||
'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
|
||
'info_dict': {
|
||
'id': 'ser.164991',
|
||
'age_limit': 8,
|
||
'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.',
|
||
'title': 'Το δίκτυο',
|
||
},
|
||
'playlist_mincount': 9,
|
||
}]
|
||
|
||
def _extract_episode(self, episode):
|
||
codename = try_get(episode, lambda x: x['Codename'], compat_str)
|
||
title = episode.get('Title')
|
||
description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', )))
|
||
if not codename or not title or not episode.get('HasPlayableStream', True):
|
||
return
|
||
thumbnail = next((
|
||
url_or_none(thumb.get('Url'))
|
||
for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {})
|
||
if thumb.get('IsMain')),
|
||
None)
|
||
return {
|
||
'_type': 'url_transparent',
|
||
'thumbnail': thumbnail,
|
||
'id': codename,
|
||
'episode_id': episode.get('Id'),
|
||
'title': title,
|
||
'alt_title': episode.get('Subtitle'),
|
||
'description': description,
|
||
'timestamp': parse_iso8601(episode.get('PublishDate')),
|
||
'duration': episode.get('DurationSeconds'),
|
||
'age_limit': self._parse_age_rating(episode),
|
||
'url': 'ertflix:%s' % (codename, ),
|
||
}
|
||
|
||
@staticmethod
|
||
def _parse_age_rating(info_dict):
|
||
return parse_age_limit(
|
||
info_dict.get('AgeRating')
|
||
or (info_dict.get('IsAdultContent') and 18)
|
||
or (info_dict.get('IsKidsContent') and 0))
|
||
|
||
def _extract_series(self, video_id, season_titles=None, season_numbers=None):
|
||
media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
|
||
|
||
series = try_get(media_info, lambda x: x['Series'], dict) or {}
|
||
series_info = {
|
||
'age_limit': self._parse_age_rating(series),
|
||
'title': series.get('Title'),
|
||
'description': dict_get(series, ('ShortDescription', 'TinyDescription', )),
|
||
}
|
||
if season_numbers:
|
||
season_titles = season_titles or []
|
||
for season in try_get(series, lambda x: x['Seasons'], list) or []:
|
||
if season.get('SeasonNumber') in season_numbers and season.get('Title'):
|
||
season_titles.append(season['Title'])
|
||
|
||
def gen_episode(m_info, season_titles):
|
||
for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
|
||
if season_titles and episode_group.get('Title') not in season_titles:
|
||
continue
|
||
episodes = try_get(episode_group, lambda x: x['Episodes'], list)
|
||
if not episodes:
|
||
continue
|
||
season_info = {
|
||
'season': episode_group.get('Title'),
|
||
'season_number': int_or_none(episode_group.get('SeasonNumber')),
|
||
}
|
||
try:
|
||
episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes]
|
||
episodes.sort()
|
||
except (KeyError, ValueError):
|
||
episodes = enumerate(episodes, 1)
|
||
for n, episode in episodes:
|
||
info = self._extract_episode(episode)
|
||
if info is None:
|
||
continue
|
||
info['episode_number'] = n
|
||
info.update(season_info)
|
||
yield info
|
||
|
||
return self.playlist_result(
|
||
gen_episode(media_info, season_titles), playlist_id=video_id, **series_info)
|
||
|
||
def _real_extract(self, url):
|
||
video_id = self._match_id(url)
|
||
if video_id.startswith('ser.'):
|
||
param_season = parse_qs(url).get('season', [None])
|
||
param_season = [
|
||
(have_number, int_or_none(v) if have_number else str_or_none(v))
|
||
for have_number, v in
|
||
[(int_or_none(ps) is not None, ps) for ps in param_season]
|
||
if v is not None
|
||
]
|
||
season_kwargs = {
|
||
k: [v for is_num, v in param_season if is_num is c] or None
|
||
for k, c in
|
||
[('season_titles', False), ('season_numbers', True)]
|
||
}
|
||
return self._extract_series(video_id, **season_kwargs)
|
||
|
||
return self._extract_episode(self._call_api_get_tiles(video_id))
|
||
|
||
|
||
class ERTWebtvEmbedIE(InfoExtractor):
|
||
IE_NAME = 'ertwebtv:embed'
|
||
IE_DESC = 'ert.gr webtv embedded videos'
|
||
_BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
|
||
_VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
|
||
|
||
_TESTS = [{
|
||
'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
|
||
'md5': 'f9e9900c25c26f4ecfbddbb4b6305854',
|
||
'info_dict': {
|
||
'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
|
||
'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
|
||
'ext': 'mp4',
|
||
'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg'
|
||
},
|
||
}]
|
||
|
||
@classmethod
|
||
def _extract_urls(cls, webpage):
|
||
EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
|
||
EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
|
||
|
||
for mobj in re.finditer(EMBED_RE, webpage):
|
||
url = unescapeHTML(mobj.group('url'))
|
||
if not cls.suitable(url):
|
||
continue
|
||
yield url
|
||
|
||
def _real_extract(self, url):
|
||
video_id = self._match_id(url)
|
||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||
f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
|
||
video_id, 'mp4')
|
||
self._sort_formats(formats)
|
||
thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
|
||
if thumbnail_id and not thumbnail_id.startswith('http'):
|
||
thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
|
||
return {
|
||
'id': video_id,
|
||
'title': f'VOD - {video_id}',
|
||
'thumbnail': thumbnail_id,
|
||
'formats': formats,
|
||
'subtitles': subs,
|
||
}
|