Dealing with RTP latest changes

This commit is contained in:
vallovic 2021-02-19 14:10:10 +00:00
parent 40edffae3d
commit 35779eda7a

View File

@ -6,22 +6,27 @@ from ..utils import (
determine_ext, determine_ext,
js_to_json, js_to_json,
) )
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
)
import re
class RTPIE(InfoExtractor): class RTPIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(.*\/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', 'url': 'https://www.rtp.pt/play/p117/e476527/os-contemporaneos',
'md5': 'e736ce0c665e459ddb818546220b4ef8',
'info_dict': { 'info_dict': {
'id': 'e174042', 'id': 'e476527',
'ext': 'mp3', 'ext': 'mp4',
'title': 'Paixões Cruzadas', 'title': 'Os Contemporâneos Episódio 1 - RTP Play - RTP',
'description': 'As paixões musicais de António Cartaxo e António Macedo', 'description': 'Os Contemporâneos, um programa de humor com um olhar na sociedade portuguesa!',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
}, },
}, { }, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', 'url': 'https://www.rtp.pt/play/p510/aleixo-fm',
'only_matching': True, 'only_matching': True,
}] }]
@ -29,30 +34,60 @@ class RTPIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_meta( title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
'twitter:title', webpage, display_name='title', fatal=True)
# Get JS object
js_object = self._search_regex(r'(?s)RTPPlayer *\( *({.+?}) *\);', webpage, 'player config')
json_string_for_config = ''
# Verify JS object since it isn't pure JSON and maybe it needs some decodings
for line in js_object.splitlines():
stripped_line = line.strip()
# If JS object key is 'file'
if re.match('file ?:', stripped_line):
if 'decodeURIComponent' in stripped_line:
# 1) The file URL is inside object and with HLS encoded...
hls_encoded = re.match(r"[^[]*\[([^]]*)\]", stripped_line).groups()[0]
hls_encoded = hls_encoded.replace('"', '').replace('\'', '').replace(',', '')
decoded_file_url = compat_b64decode(
compat_urllib_parse_unquote(
hls_encoded.replace('"', '').replace(',', ''))).decode('utf-8')
# Insert the decoded HLS file URL into pure JSON string
json_string_for_config += '\nfile: "' + decoded_file_url + '",'
else:
# 2) ... or it's a direct M3U8 file
json_string_for_config += '\n' + line
elif not stripped_line.startswith("//") and not re.match('fileKey ?:', stripped_line):
# Ignore commented lines and 'fileKey' entry since it is no longer supported by RTP
json_string_for_config += '\n' + line
# Finally send pure JSON string for JSON parsing
config = self._parse_json(json_string_for_config, video_id, js_to_json)
# config = self._parse_json(self._search_regex(
# r'(?s)RTPPlayer ?\( ?({.+?})\);', webpage,
# 'player config'), video_id, js_to_json)
config = self._parse_json(self._search_regex(
r'(?s)RTPPlayer\(({.+?})\);', webpage,
'player config'), video_id, js_to_json)
file_url = config['file'] file_url = config['file']
ext = determine_ext(file_url) ext = determine_ext(file_url)
if ext == 'm3u8': if ext == 'm3u8':
file_key = config.get('fileKey') # Download via m3u8 file
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', 'm3u8_native', file_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=file_key) m3u8_id='hls')
if file_key:
formats.append({
'url': 'https://cdn-ondemand.rtp.pt' + file_key,
'preference': 1,
})
self._sort_formats(formats) self._sort_formats(formats)
else: else:
formats = [{ formats = [{
'url': file_url, 'url': file_url,
'ext': ext, 'ext': ext,
}] }]
if config.get('mediaType') == 'audio': if config.get('mediaType') == 'audio':
for f in formats: for f in formats:
f['vcodec'] = 'none' f['vcodec'] = 'none'