[ie/CBCPlayerPlaylist] Add extractor (#7870)

Authored by: trainman261
This commit is contained in:
trainman261 2023-08-20 18:35:57 +02:00 committed by GitHub
parent a0de8bb860
commit ed71189781
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 0 deletions

View File

@ -303,6 +303,7 @@
from .cbc import ( from .cbc import (
CBCIE, CBCIE,
CBCPlayerIE, CBCPlayerIE,
CBCPlayerPlaylistIE,
CBCGemIE, CBCGemIE,
CBCGemPlaylistIE, CBCGemPlaylistIE,
CBCGemLiveIE, CBCGemLiveIE,

View File

@ -2,6 +2,7 @@
import json import json
import base64 import base64
import time import time
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -228,6 +229,38 @@ def _real_extract(self, url):
} }
class CBCPlayerPlaylistIE(InfoExtractor):
IE_NAME = 'cbc.ca:player:playlist'
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
_TESTS = [{
'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
'playlist_mincount': 25,
'info_dict': {
'id': 'news/tv shows/the national/latest broadcast',
}
}, {
'url': 'https://www.cbc.ca/player/news/Canada/North',
'playlist_mincount': 25,
'info_dict': {
'id': 'news/canada/north',
}
}]
def _real_extract(self, url):
playlist_id = urllib.parse.unquote(self._match_id(url)).lower()
webpage = self._download_webpage(url, playlist_id)
json_content = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', playlist_id)
def entries():
for video_id in traverse_obj(json_content, (
'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id'
)):
yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE)
return self.playlist_result(entries(), playlist_id)
class CBCGemIE(InfoExtractor): class CBCGemIE(InfoExtractor):
IE_NAME = 'gem.cbc.ca' IE_NAME = 'gem.cbc.ca'
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'