merge 'master'

This commit is contained in:
Mozi 2024-10-30 17:37:36 +00:00
commit 7f94f2fceb
12 changed files with 312 additions and 176 deletions

View File

@ -216,5 +216,23 @@
"action": "add", "action": "add",
"when": "d784464399b600ba9516bbcec6286f11d68974dd", "when": "d784464399b600ba9516bbcec6286f11d68974dd",
"short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)" "short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
},
{
"action": "change",
"when": "914af9a0cf51c9a3f74aa88d952bee8334c67511",
"short": "Expand paths in `--plugin-dirs` (#11334)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "c29f5a7fae93a08f3cfbb6127b2faa75145b06a0",
"short": "[ie/generic] Do not impersonate by default (#11336)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
"short": "[ie/vimeo] Fix API retries (#11351)",
"authors": ["bashonly"]
} }
] ]

View File

@ -71,14 +71,13 @@ def group_lookup(cls):
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
group, _, subgroup = (group.strip().lower() for group in value.partition('/')) group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
result = cls.group_lookup().get(group) if result := cls.group_lookup().get(group):
if not result: return result, subgroup or None
if subgroup: if subgroup:
return None, value return None, value
subgroup = group
result = cls.subgroup_lookup().get(subgroup)
return result, subgroup or None return cls.subgroup_lookup().get(group), group or None
@dataclass @dataclass
@ -136,8 +135,7 @@ def _format_groups(self, groups):
first = False first = False
yield '\n<details><summary><h3>Changelog</h3></summary>\n' yield '\n<details><summary><h3>Changelog</h3></summary>\n'
group = groups[item] if group := groups[item]:
if group:
yield self.format_module(item.value, group) yield self.format_module(item.value, group)
if self._collapsible: if self._collapsible:
@ -253,7 +251,7 @@ class CommitRange:
''', re.VERBOSE | re.DOTALL) ''', re.VERBOSE | re.DOTALL)
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})') FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
def __init__(self, start, end, default_author=None): def __init__(self, start, end, default_author=None):
@ -287,11 +285,16 @@ def _get_commits_and_fixes(self, default_author):
short = next(lines) short = next(lines)
skip = short.startswith('Release ') or short == '[version] update' skip = short.startswith('Release ') or short == '[version] update'
fix_commitish = None
if match := self.FIXES_RE.search(short):
fix_commitish = match.group(1)
authors = [default_author] if default_author else [] authors = [default_author] if default_author else []
for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
match = self.AUTHOR_INDICATOR_RE.match(line) if match := self.AUTHOR_INDICATOR_RE.match(line):
if match:
authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
fix_commitish = match.group(1)
commit = Commit(commit_hash, short, authors) commit = Commit(commit_hash, short, authors)
if skip and (self._start or not i): if skip and (self._start or not i):
@ -301,21 +304,17 @@ def _get_commits_and_fixes(self, default_author):
logger.debug(f'Reached Release commit, breaking: {commit}') logger.debug(f'Reached Release commit, breaking: {commit}')
break break
revert_match = self.REVERT_RE.fullmatch(commit.short) if match := self.REVERT_RE.fullmatch(commit.short):
if revert_match: reverts[match.group(1)] = commit
reverts[revert_match.group(1)] = commit
continue continue
fix_match = self.FIXES_RE.search(commit.short) if fix_commitish:
if fix_match: fixes[fix_commitish].append(commit)
commitish = fix_match.group(1)
fixes[commitish].append(commit)
commits[commit.hash] = commit commits[commit.hash] = commit
for commitish, revert_commit in reverts.items(): for commitish, revert_commit in reverts.items():
reverted = commits.pop(commitish, None) if reverted := commits.pop(commitish, None):
if reverted:
logger.debug(f'{commitish} fully reverted {reverted}') logger.debug(f'{commitish} fully reverted {reverted}')
else: else:
commits[revert_commit.hash] = revert_commit commits[revert_commit.hash] = revert_commit
@ -461,8 +460,7 @@ def create_changelog(args):
logger.info(f'Loaded {len(commits)} commits') logger.info(f'Loaded {len(commits)} commits')
new_contributors = get_new_contributors(args.contributors_path, commits) if new_contributors := get_new_contributors(args.contributors_path, commits):
if new_contributors:
if args.contributors: if args.contributors:
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
logger.info(f'New contributors: {", ".join(new_contributors)}') logger.info(f'New contributors: {", ".join(new_contributors)}')

View File

@ -2849,13 +2849,10 @@ def is_wellformed(f):
sanitize_string_field(fmt, 'format_id') sanitize_string_field(fmt, 'format_id')
sanitize_numeric_fields(fmt) sanitize_numeric_fields(fmt)
fmt['url'] = sanitize_url(fmt['url']) fmt['url'] = sanitize_url(fmt['url'])
if fmt.get('ext') is None: FormatSorter._fill_sorting_fields(fmt)
fmt['ext'] = determine_ext(fmt['url']).lower()
if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
if fmt.get('acodec') is None: if fmt.get('acodec') is None:
fmt['acodec'] = fmt['ext'] fmt['acodec'] = fmt['ext']
if fmt.get('protocol') is None:
fmt['protocol'] = determine_protocol(fmt)
if fmt.get('resolution') is None: if fmt.get('resolution') is None:
fmt['resolution'] = self.format_resolution(fmt, default=None) fmt['resolution'] = self.format_resolution(fmt, default=None)
if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':

View File

@ -401,8 +401,6 @@
from .cnbc import CNBCVideoIE from .cnbc import CNBCVideoIE
from .cnn import ( from .cnn import (
CNNIE, CNNIE,
CNNArticleIE,
CNNBlogsIE,
CNNIndonesiaIE, CNNIndonesiaIE,
) )
from .comedycentral import ( from .comedycentral import (

View File

@ -12,53 +12,86 @@
class CCMAIE(InfoExtractor): class CCMAIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)' IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', # ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
'md5': '7296ca43977c8ea4469e719c609b0871', 'md5': '7296ca43977c8ea4469e719c609b0871',
'info_dict': { 'info_dict': {
'id': '5630208', 'id': '5630208',
'ext': 'mp4', 'ext': 'mp4',
'title': 'L\'espot de La Marató de TV3', 'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765', 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1478608140, 'timestamp': 1478608140,
'upload_date': '20161108', 'upload_date': '20161108',
'age_limit': 0, 'age_limit': 0,
'alt_title': 'EsportMarató2016WEB_PerPublicar',
'duration': 79,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
'categories': ['Divulgació'],
}, },
}, { }, {
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', # ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'md5': 'fa3e38f269329a278271276330261425', 'md5': 'fa3e38f269329a278271276330261425',
'info_dict': { 'info_dict': {
'id': '943685', 'id': '943685',
'ext': 'mp3', 'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi', 'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20170512', 'upload_date': '20161217',
'timestamp': 1494622500, 'timestamp': 1482011700,
'vcodec': 'none', 'vcodec': 'none',
'categories': ['Esports'], 'categories': ['Esports'],
'series': 'Tot gira',
'duration': 821,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
}, },
}, { }, {
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/', 'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
'md5': 'b43c3d3486f430f3032b5b160d80cbc3', 'md5': '27493513d08a3e5605814aee9bb778d2',
'info_dict': { 'info_dict': {
'id': '6031387', 'id': '6031387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)', 'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60', 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
'timestamp': 1582577700, 'timestamp': 1582577919,
'upload_date': '20200224', 'upload_date': '20200224',
'subtitles': 'mincount:4', 'subtitles': 'mincount:1',
'age_limit': 16, 'age_limit': 13,
'series': 'Crims', 'series': 'Crims',
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
'duration': 3203,
'categories': ['Divulgació'],
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
'episode_number': 5,
'episode': 'Episode 5',
},
}, {
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
'info_dict': {
'id': '5759227',
'ext': 'mp4',
'title': 'Una mosca volava per la llum',
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
'description': 'md5:9ab64276944b0825336f4147f13f7854',
'series': 'Mic',
'upload_date': '20180411',
'timestamp': 1523440105,
'duration': 160,
'age_limit': 0,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
'categories': ['Música'],
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
media_type, media_id = self._match_valid_url(url).groups() media_type, media_id = self._match_valid_url(url).group('type', 'id')
media = self._download_json( media = self._download_json(
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ 'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
'media': media_type, 'media': media_type,
'idint': media_id, 'idint': media_id,
'format': 'dm', 'format': 'dm',

View File

@ -1,146 +1,226 @@
import functools
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from .turner import TurnerBaseIE from ..utils import (
from ..utils import merge_dicts, try_call, url_basename clean_html,
extract_attributes,
int_or_none,
merge_dicts,
parse_duration,
parse_iso8601,
parse_resolution,
try_call,
update_url,
url_or_none,
)
from ..utils.traversal import find_elements, traverse_obj
class CNNIE(TurnerBaseIE): class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ _VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
_TESTS = [{ _TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'md5': '3e6121ea48df7e2259fe73a0628605c4',
'info_dict': { 'info_dict': {
'id': 'sports/2013/06/09/nadal-1-on-1.cnn', 'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nadal wins 8th French Open title', 'upload_date': '20240531',
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', 'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
'duration': 135, 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
'upload_date': '20130609', 'duration': 373.0,
'timestamp': 1717148586,
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
'modified_date': '20240531',
'modified_timestamp': 1717150140,
}, },
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', 'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
'info_dict': { 'info_dict': {
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', 'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'ext': 'mp4', 'ext': 'mp4',
'title': "Student's epic speech stuns new freshmen", 'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."', 'title': 'Heres how some inmates in closely divided state are now able to vote from jail',
'upload_date': '20130821', 'timestamp': 1718158269,
'upload_date': '20240612',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
'duration': 202.0,
'modified_date': '20240612',
'modified_timestamp': 1718158509,
}, },
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', 'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
'info_dict': { 'info_dict': {
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', 'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nashville Ep. 1: Hand crafted skateboards', 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
'description': 'md5:e7223a503315c9f150acac52e76de086', 'description': 'md5:19f78338ccec533db0fa8a4511012dae',
'upload_date': '20141222', 'title': 'Video shows King Charles\' portrait being vandalized by activists',
'timestamp': 1718113852,
'upload_date': '20240611',
'duration': 51.0,
'modified_timestamp': 1718116193,
'modified_date': '20240611',
}, },
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', 'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
'info_dict': { 'info_dict': {
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', 'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'ext': 'mp4', 'ext': 'mp4',
'title': '5 stunning stats about Netflix', 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', 'duration': 158.0,
'upload_date': '20160819', 'title': 'Robin Meade signs off after HLN\'s last broadcast',
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
'upload_date': '20221205',
'timestamp': 1670284296,
'modified_timestamp': 1670332404,
'modified_date': '20221206',
}, },
'params': { 'params': {'format': 'direct'},
# m3u8 download }, {
'skip_download': True, 'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
'info_dict': {
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
'ext': 'mp4',
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
'timestamp': 1729501452,
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
'description': 'md5:256ee7137d161f776cda429654135e52',
'upload_date': '20241021',
'duration': 31.0,
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
'modified_date': '20241021',
'modified_timestamp': 1729501530,
}, },
}, { }, {
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', 'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
'only_matching': True, 'info_dict': {
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
},
'playlist_count': 2,
'playlist': [{
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
'info_dict': {
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
'ext': 'mp4',
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
'duration': 173.0,
'timestamp': 1729122182,
'upload_date': '20241016',
'modified_timestamp': 1729194706,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}, { }, {
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', 'md5': '11604ab4af83b650826753f1ccb8ecff',
'only_matching': True, 'info_dict': {
}, { 'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn', 'ext': 'mp4',
'only_matching': True, 'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
'duration': 145.0,
'timestamp': 1729137765,
'upload_date': '20241017',
'modified_timestamp': 1729138184,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}],
}] }]
_CONFIG = {
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
'edition': {
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
},
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
'money': {
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
'media_src': 'http://ht3.cdn.turner.com/money/big',
},
}
def _extract_timestamp(self, video_data):
# TODO: fix timestamp extraction
return None
def _real_extract(self, url): def _real_extract(self, url):
sub_domain, path, page_title = self._match_valid_url(url).groups() display_id = self._match_valid_url(url).group('display_id')
if sub_domain not in ('money', 'edition'): webpage = self._download_webpage(url, display_id)
sub_domain = 'edition' app_id = traverse_obj(
config = self._CONFIG[sub_domain] self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
return self._extract_cvp_info( ('TOP_AUTH_SERVICE_APP_ID', {str}))
config['data_src'] % path, page_title, {
'default': { entries = []
'media_src': config['media_src'], for player_data in traverse_obj(webpage, (
}, {find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
'f4m': { ..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
'host': 'cnn-vh.akamaihd.net', media_id = player_data['data-media-id']
}, parent_uri = player_data.get('data-video-resource-parent-uri')
formats, subtitles = [], {}
video_data = {}
if parent_uri:
video_data = self._download_json(
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
query={
'id': media_id,
'stellarUri': parent_uri,
})
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
resolution, bitrate = None, None
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
resolution, bitrate = mobj.group('res', 'tbr')
formats.append({
'url': direct_url,
'format_id': 'direct',
'quality': 1,
'tbr': int_or_none(bitrate),
**parse_resolution(resolution),
})
for sub_data in traverse_obj(video_data, (
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
'url': sub_data['url'],
'name': sub_data.get('label'),
}) })
if app_id:
media_data = self._download_json(
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
query={'appId': app_id})
m3u8_url = traverse_obj(media_data, (
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
if m3u8_url:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
class CNNBlogsIE(InfoExtractor): entries.append({
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+' **traverse_obj(player_data, {
_TEST = { 'title': ('data-headline', {clean_html}),
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/', 'description': ('data-description', {clean_html}),
'md5': '3e56f97b0b6ffb4b79f4ea0749551084', 'duration': ('data-duration', {parse_duration}),
'info_dict': { 'timestamp': ('data-publish-date', {parse_iso8601}),
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn', 'thumbnail': (
'ext': 'mp4', 'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
'title': 'Criminalizing journalism?', {functools.partial(update_url, query='c=original')}),
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', 'display_id': 'data-video-slug',
'upload_date': '20140209', }),
}, **traverse_obj(video_data, {
'expected_warnings': ['Failed to download m3u8 information'], 'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
'add_ie': ['CNN'], 'description': ('description', {clean_html}),
'title': ('headline', {str}),
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
'duration': ('trt', {int_or_none}),
}),
'id': media_id,
'formats': formats,
'subtitles': subtitles,
})
if len(entries) == 1:
return {
**entries[0],
'display_id': display_id,
} }
def _real_extract(self, url): return self.playlist_result(entries, display_id)
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return self.url_result(cnn_url, CNNIE.ie_key())
class CNNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
_TEST = {
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
'info_dict': {
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
'ext': 'mp4',
'title': 'Obama: Cyberattack not an act of war',
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
'upload_date': '20141221',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
class CNNIndonesiaIE(InfoExtractor): class CNNIndonesiaIE(InfoExtractor):

View File

@ -47,6 +47,7 @@
FormatSorter, FormatSorter,
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
ISO639Utils,
LenientJSONDecoder, LenientJSONDecoder,
Popen, Popen,
RegexNotFoundError, RegexNotFoundError,
@ -3071,7 +3072,11 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
url_pattern = stream.attrib['Url'] url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name') stream_name = stream.get('Name')
stream_language = stream.get('Language', 'und') # IsmFD expects ISO 639 Set 2 language codes (3-character length)
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
stream_language = stream.get('Language') or 'und'
if len(stream_language) != 3:
stream_language = ISO639Utils.short2long(stream_language) or 'und'
for track in stream.findall('QualityLevel'): for track in stream.findall('QualityLevel'):
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'} KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag')) fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))

View File

@ -869,7 +869,7 @@ def _real_extract(self, url):
class NiconicoUserIE(InfoExtractor): class NiconicoUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])' _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)(?:/video)?/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'https://www.nicovideo.jp/user/419948', 'url': 'https://www.nicovideo.jp/user/419948',
'info_dict': { 'info_dict': {
@ -877,7 +877,7 @@ class NiconicoUserIE(InfoExtractor):
}, },
'playlist_mincount': 101, 'playlist_mincount': 101,
} }
_API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s' _API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
_PAGE_SIZE = 100 _PAGE_SIZE = 100
_API_HEADERS = { _API_HEADERS = {
@ -897,12 +897,13 @@ def _entries(self, list_id):
total_count = int_or_none(json_parsed['data'].get('totalCount')) total_count = int_or_none(json_parsed['data'].get('totalCount'))
for entry in json_parsed['data']['items']: for entry in json_parsed['data']['items']:
count += 1 count += 1
yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id'])) yield self.url_result(
f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE)
page_num += 1 page_num += 1
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key()) return self.playlist_result(self._entries(list_id), list_id)
class NiconicoLiveIE(InfoExtractor): class NiconicoLiveIE(InfoExtractor):

View File

@ -208,7 +208,6 @@ def sign(self, user, pw, clid):
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False): def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
track_id = str(info['id']) track_id = str(info['id'])
title = info['title']
format_urls = set() format_urls = set()
formats = [] formats = []
@ -367,7 +366,7 @@ def extract_count(key):
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'), 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
'uploader_url': user.get('permalink_url'), 'uploader_url': user.get('permalink_url'),
'timestamp': unified_timestamp(info.get('created_at')), 'timestamp': unified_timestamp(info.get('created_at')),
'title': title, 'title': info.get('title'),
'description': info.get('description'), 'description': info.get('description'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(info.get('duration'), 1000), 'duration': float_or_none(info.get('duration'), 1000),
@ -377,7 +376,8 @@ def extract_count(key):
'like_count': extract_count('favoritings') or extract_count('likes'), 'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'), 'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'), 'repost_count': extract_count('reposts'),
'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)), 'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
'formats': formats if not extract_flat else None, 'formats': formats if not extract_flat else None,
} }
@ -429,7 +429,6 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int, 'repost_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
'uploader_url': 'https://soundcloud.com/ethmusic', 'uploader_url': 'https://soundcloud.com/ethmusic',
'genres': [],
}, },
}, },
# geo-restricted # geo-restricted
@ -453,6 +452,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'uploader_url': 'https://soundcloud.com/the-concept-band', 'uploader_url': 'https://soundcloud.com/the-concept-band',
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
'genres': ['Alternative'], 'genres': ['Alternative'],
'artists': ['The Royal Concept'],
}, },
}, },
# private link # private link
@ -525,6 +525,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int, 'repost_count': int,
'view_count': int, 'view_count': int,
'genres': ['Dance & EDM'], 'genres': ['Dance & EDM'],
'artists': ['80M'],
}, },
}, },
# private link, downloadable format # private link, downloadable format
@ -549,6 +550,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg', 'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
'uploader_url': 'https://soundcloud.com/oriuplift', 'uploader_url': 'https://soundcloud.com/oriuplift',
'genres': ['Trance'], 'genres': ['Trance'],
'artists': ['Ori Uplift'],
}, },
}, },
# no album art, use avatar pic for thumbnail # no album art, use avatar pic for thumbnail
@ -572,7 +574,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'uploader_url': 'https://soundcloud.com/garyvee', 'uploader_url': 'https://soundcloud.com/garyvee',
'genres': [], 'artists': ['MadReal'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

View File

@ -869,11 +869,12 @@ def _extract_from_api(self, video_id, unlisted_hash=None):
for retry in (False, True): for retry in (False, True):
try: try:
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash) video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
break
except ExtractorError as e: except ExtractorError as e:
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400 if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
and 'password' in traverse_obj( and 'password' in traverse_obj(
e.cause.response.read(), self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'), ({json.loads}, 'invalid_parameters', ..., 'field'),
)): )):
self._verify_video_password( self._verify_video_password(
video_id, self._get_video_password(), viewer['xsrft']) video_id, self._get_video_password(), viewer['xsrft'])

View File

@ -5165,6 +5165,7 @@ class _UnsafeExtensionError(Exception):
'ico', 'ico',
'image', 'image',
'jng', 'jng',
'jpe',
'jpeg', 'jpeg',
'jxl', 'jxl',
'svg', 'svg',
@ -5578,14 +5579,15 @@ def _calculate_field_preference(self, format_, field):
value = get_value(field) value = get_value(field)
return self._calculate_field_preference_from_value(format_, field, type_, value) return self._calculate_field_preference_from_value(format_, field, type_, value)
def calculate_preference(self, format): @staticmethod
def _fill_sorting_fields(format):
# Determine missing protocol # Determine missing protocol
if not format.get('protocol'): if not format.get('protocol'):
format['protocol'] = determine_protocol(format) format['protocol'] = determine_protocol(format)
# Determine missing ext # Determine missing ext
if not format.get('ext') and 'url' in format: if not format.get('ext') and 'url' in format:
format['ext'] = determine_ext(format['url']) format['ext'] = determine_ext(format['url']).lower()
if format.get('vcodec') == 'none': if format.get('vcodec') == 'none':
format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none' format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
format['video_ext'] = 'none' format['video_ext'] = 'none'
@ -5613,6 +5615,8 @@ def calculate_preference(self, format):
if not format.get('tbr'): if not format.get('tbr'):
format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
def calculate_preference(self, format):
self._fill_sorting_fields(format)
return tuple(self._calculate_field_preference(format, field) for field in self._order) return tuple(self._calculate_field_preference(format, field) for field in self._order)

View File

@ -391,14 +391,13 @@ def find_element(*, tag: str, html=False): ...
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False): def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
# deliberately using `id=` and `cls=` for ease of readability # deliberately using `id=` and `cls=` for ease of readability
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required' assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
if not tag: ANY_TAG = r'[\w:.-]+'
tag = r'[\w:.-]+'
if attr and value: if attr and value:
assert not cls, 'Cannot match both attr and cls' assert not cls, 'Cannot match both attr and cls'
assert not id, 'Cannot match both attr and id' assert not id, 'Cannot match both attr and id'
func = get_element_html_by_attribute if html else get_element_by_attribute func = get_element_html_by_attribute if html else get_element_by_attribute
return functools.partial(func, attr, value, tag=tag) return functools.partial(func, attr, value, tag=tag or ANY_TAG)
elif cls: elif cls:
assert not id, 'Cannot match both cls and id' assert not id, 'Cannot match both cls and id'
@ -408,7 +407,7 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
elif id: elif id:
func = get_element_html_by_id if html else get_element_by_id func = get_element_html_by_id if html else get_element_by_id
return functools.partial(func, id, tag=tag) return functools.partial(func, id, tag=tag or ANY_TAG)
index = int(bool(html)) index = int(bool(html))
return lambda html: get_element_text_and_html_by_tag(tag, html)[index] return lambda html: get_element_text_and_html_by_tag(tag, html)[index]