From 6fc85f617a5850307fd5b258477070e6ee177796 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 23 Dec 2024 15:57:25 +0530 Subject: [PATCH 1/5] Don't sanitize filename on Unix when `--no-windows-filenames` (#9591) Closes #4547, Closes #8464 Authored by: pukkandan --- README.md | 3 +-- test/test_YoutubeDL.py | 7 +++++++ yt_dlp/YoutubeDL.py | 26 ++++++++++++++++++-------- yt_dlp/options.py | 4 ++-- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 1db4ed2a5d..f8c99ace40 100644 --- a/README.md +++ b/README.md @@ -613,8 +613,7 @@ ## Filesystem Options: --no-restrict-filenames Allow Unicode characters, "&" and spaces in filenames (default) --windows-filenames Force filenames to be Windows-compatible - --no-windows-filenames Make filenames Windows-compatible only if - using Windows (default) + --no-windows-filenames Sanitize filenames only minimally --trim-filenames LENGTH Limit the filename length (excluding extension) to the specified number of characters diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 966d27a498..6b022a7eaa 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -761,6 +761,13 @@ def test(tmpl, expected, *, info=None, **params): test('%(width)06d.%%(ext)s', 'NA.%(ext)s') test('%%(width)06d.%(ext)s', '%(width)06d.mp4') + # Sanitization options + test('%(title3)s', (None, 'foo⧸bar⧹test')) + test('%(title5)s', (None, 'aei_A'), restrictfilenames=True) + test('%(title3)s', (None, 'foo_bar_test'), windowsfilenames=False, restrictfilenames=True) + if sys.platform != 'win32': + test('%(title3)s', (None, 'foo⧸bar\\test'), windowsfilenames=False) + # ID sanitization test('%(id)s', '_abcd', info={'id': '_abcd'}) test('%(some_id)s', '_abcd', info={'some_id': '_abcd'}) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 65b72e026c..764baf3a00 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -266,7 +266,9 @@ class YoutubeDL: outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) - windowsfilenames: Force the filenames to be windows compatible + windowsfilenames: True: Force filenames to be Windows compatible + False: Sanitize filenames only minimally + This option has no effect when running on Windows ignoreerrors: Do not stop on download/postprocessing errors. Can be 'only_download' to ignore only download errors. Default is 'only_download' for CLI, but False for API @@ -1192,8 +1194,7 @@ def _copy_infodict(info_dict): def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict - @param sanitize Whether to sanitize the output as a filename. - For backward compatibility, a function can also be passed + @param sanitize Whether to sanitize the output as a filename """ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set @@ -1309,14 +1310,23 @@ def get_value(mdict): na = self.params.get('outtmpl_na_placeholder', 'NA') - def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): + def filename_sanitizer(key, value, restricted): return sanitize_filename(str(value), restricted=restricted, is_id=( bool(re.search(r'(^|[_.])id(\.|$)', key)) if 'filename-sanitization' in self.params['compat_opts'] else NO_DEFAULT)) - sanitizer = sanitize if callable(sanitize) else filename_sanitizer - sanitize = bool(sanitize) + if callable(sanitize): + self.deprecation_warning('Passing a callable "sanitize" to YoutubeDL.prepare_outtmpl is deprecated') + elif not sanitize: + pass + elif (sys.platform != 'win32' and not self.params.get('restrictfilenames') + and self.params.get('windowsfilenames') is False): + def sanitize(key, value): + return value.replace('/', '\u29F8').replace('\0', '') + else: + def sanitize(key, value): + return filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')) def _dumpjson_default(obj): if isinstance(obj, (set, LazyList)): @@ -1399,13 +1409,13 @@ def create_key(outer_mobj): if sanitize: # If value is an object, sanitize might convert it to a string - # So we convert it to repr first + # So we manually convert it before sanitizing if fmt[-1] == 'r': value, fmt = repr(value), str_fmt elif fmt[-1] == 'a': value, fmt = ascii(value), str_fmt if fmt[-1] in 'csra': - value = sanitizer(last_field, value) + value = sanitize(last_field, value) key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 930d9d4bef..06b65e0eac 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1370,12 +1370,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): help='Allow Unicode characters, "&" and spaces in filenames (default)') filesystem.add_option( '--windows-filenames', - action='store_true', dest='windowsfilenames', default=False, + action='store_true', dest='windowsfilenames', default=None, help='Force filenames to be Windows-compatible') filesystem.add_option( '--no-windows-filenames', action='store_false', dest='windowsfilenames', - help='Make filenames Windows-compatible only if using Windows (default)') + help='Sanitize filenames only minimally') filesystem.add_option( '--trim-filenames', '--trim-file-names', metavar='LENGTH', dest='trim_file_name', default=0, type=int, From 9f42e68a74f3f00b0253fe70763abd57cac4237b Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 24 Dec 2024 12:03:28 +1300 Subject: [PATCH 2/5] [ie/youtube] Skip iOS formats that require PO Token (#11890) Partial fix for https://github.com/yt-dlp/yt-dlp/issues/11868 Authored by: coletdjnz --- README.md | 2 +- yt_dlp/extractor/youtube.py | 44 +++++++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index f8c99ace40..1c628d0257 100644 --- a/README.md +++ b/README.md @@ -1775,7 +1775,7 @@ #### youtube * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total -* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) +* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one) * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used * `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 0d3963116e..2638eaa5df 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -214,6 +214,7 @@ }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, + 'REQUIRE_PO_TOKEN': True, 'REQUIRE_JS_PLAYER': False, }, # This client now requires sign-in for every video @@ -3973,13 +3974,10 @@ def append_client(*client_names): ) require_po_token = self._get_default_ytcfg(client).get('REQUIRE_PO_TOKEN') - if not po_token and require_po_token: + if not po_token and require_po_token and 'missing_pot' in self._configuration_arg('formats'): self.report_warning( f'No PO Token provided for {client} client, ' - f'which is required for working {client} formats. ' - f'You can manually pass a PO Token for this client with ' - f'--extractor-args "youtube:po_token={client}+XXX"', - only_once=True) + f'which may be required for working {client} formats. This client will be deprioritized', only_once=True) deprioritize_pr = True pr = initial_pr if client == 'web' else None @@ -4053,6 +4051,21 @@ def _needs_live_processing(self, live_status, duration): or (live_status == 'post_live' and (duration or 0) > 2 * 3600)): return live_status + def _report_pot_format_skipped(self, video_id, client_name, proto): + msg = ( + f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. ' + 'They will be skipped as they may yield HTTP Error 403. ' + f'You can manually pass a PO Token for this client with --extractor-args "youtube:po_token={client_name}+XXX. ' + 'For more information, refer to https://github.com/yt-dlp/yt-dlp/wiki/Extractors#po-token-guide . ' + 'To enable these broken formats anyway, pass --extractor-args "youtube:formats=missing_pot"') + + # Only raise a warning for non-default clients, to not confuse users. + # iOS HLS formats still work without PO Token, so we don't need to warn about them. + if client_name in (*self._DEFAULT_CLIENTS, *self._DEFAULT_AUTHED_CLIENTS): + self.write_debug(msg, only_once=True) + else: + self.report_warning(msg, only_once=True) + def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): CHUNK_SIZE = 10 << 20 PREFERRED_LANG_VALUE = 10 @@ -4179,11 +4192,10 @@ def build_fragments(f): fmt_url = update_url_query(fmt_url, {'pot': po_token}) # Clients that require PO Token return videoplayback URLs that may return 403 - is_broken = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) - if is_broken: - self.report_warning( - f'{video_id}: {client_name} client formats require a PO Token which was not provided. ' - 'They will be deprioritized as they may yield HTTP Error 403', only_once=True) + require_po_token = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) + if require_po_token and 'missing_pot' not in self._configuration_arg('formats'): + self._report_pot_format_skipped(video_id, client_name, 'https') + continue name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or '' fps = int_or_none(fmt.get('fps')) or 0 @@ -4196,7 +4208,7 @@ def build_fragments(f): name, fmt.get('isDrc') and 'DRC', try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), - is_damaged and 'DAMAGED', is_broken and 'BROKEN', + is_damaged and 'DAMAGED', require_po_token and 'MISSING POT', (self.get_param('verbose') or all_formats) and short_client_name(client_name), delim=', '), # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 @@ -4213,7 +4225,7 @@ def build_fragments(f): 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None, 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1, # Strictly de-prioritize broken, damaged and 3gp formats - 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None, + 'preference': -20 if require_po_token else -10 if is_damaged else -2 if itag == '17' else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') @@ -4271,10 +4283,10 @@ def process_manifest_format(f, proto, client_name, itag, po_token): # Clients that require PO Token return videoplayback URLs that may return 403 # hls does not currently require PO Token if (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) and proto != 'hls': - self.report_warning( - f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. ' - 'They will be deprioritized as they may yield HTTP Error 403', only_once=True) - f['format_note'] = join_nonempty(f.get('format_note'), 'BROKEN', delim=' ') + if 'missing_pot' not in self._configuration_arg('formats'): + self._report_pot_format_skipped(video_id, client_name, proto) + return False + f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ') f['source_preference'] -= 20 if itag and all_formats: From 65cf46cddd873fd229dbb0fc0689bca4c201c6b6 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 23 Dec 2024 17:26:35 -0600 Subject: [PATCH 3/5] [ie/youtube] Player client maintenance (#11893) Closes #11867 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 2638eaa5df..33a93c5c9f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -162,7 +162,6 @@ 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, 'REQUIRE_AUTH': True, - 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video 'android_creator': { @@ -197,7 +196,6 @@ }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, 'REQUIRE_JS_PLAYER': False, - 'SUPPORTS_COOKIES': True, }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 @@ -233,7 +231,6 @@ 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_AUTH': True, - 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video 'ios_creator': { @@ -4028,7 +4025,6 @@ def append_client(*client_names): f'{video_id}: This video is age-restricted and YouTube is requiring ' 'account age-verification; some formats may be missing', only_once=True) # web_creator can work around the age-verification requirement - # android_vr may also be able to work around age-verification # tv_embedded may(?) still work around age-verification if the video is embeddable append_client('web_creator') ''' From 3905f64920ed078d9eeb5640884f5854e01d744d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 23 Dec 2024 23:47:20 +0000 Subject: [PATCH 4/5] Release 2024.12.23 Created by: bashonly :ci skip all --- Changelog.md | 16 ++++++++++++++++ yt_dlp/version.py | 6 +++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Changelog.md b/Changelog.md index 75e8240335..22a9a6e4bb 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,22 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.12.23 + +#### Core changes +- [Don't sanitize filename on Unix when `--no-windows-filenames`](https://github.com/yt-dlp/yt-dlp/commit/6fc85f617a5850307fd5b258477070e6ee177796) ([#9591](https://github.com/yt-dlp/yt-dlp/issues/9591)) by [pukkandan](https://github.com/pukkandan) +- **update** + - [Check 64-bitness when upgrading ARM builds](https://github.com/yt-dlp/yt-dlp/commit/b91c3925c2059970daa801cb131c0c2f4f302e72) ([#11819](https://github.com/yt-dlp/yt-dlp/issues/11819)) by [bashonly](https://github.com/bashonly) + - [Fix endless update loop for `linux_exe` builds](https://github.com/yt-dlp/yt-dlp/commit/3d3ee458c1fe49dd5ebd7651a092119d23eb7000) ([#11827](https://github.com/yt-dlp/yt-dlp/issues/11827)) by [bashonly](https://github.com/bashonly) + +#### Extractor changes +- **soundcloud**: [Various fixes](https://github.com/yt-dlp/yt-dlp/commit/d298693b1b266d198e8eeecb90ea17c4a031268f) ([#11820](https://github.com/yt-dlp/yt-dlp/issues/11820)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Add age-gate workaround for some embeddable videos](https://github.com/yt-dlp/yt-dlp/commit/09a6c687126f04e243fcb105a828787efddd1030) ([#11821](https://github.com/yt-dlp/yt-dlp/issues/11821)) by [bashonly](https://github.com/bashonly) + - [Fix `uploader_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/1a8851b689763e5173b96f70f8a71df0e4a44b66) ([#11818](https://github.com/yt-dlp/yt-dlp/issues/11818)) by [bashonly](https://github.com/bashonly) + - [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/65cf46cddd873fd229dbb0fc0689bca4c201c6b6) ([#11893](https://github.com/yt-dlp/yt-dlp/issues/11893)) by [bashonly](https://github.com/bashonly) + - [Skip iOS formats that require PO Token](https://github.com/yt-dlp/yt-dlp/commit/9f42e68a74f3f00b0253fe70763abd57cac4237b) ([#11890](https://github.com/yt-dlp/yt-dlp/issues/11890)) by [coletdjnz](https://github.com/coletdjnz) + ### 2024.12.13 #### Extractor changes diff --git a/yt_dlp/version.py b/yt_dlp/version.py index f696e1e9d0..1ff43c611f 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.12.13' +__version__ = '2024.12.23' -RELEASE_GIT_HEAD = '54216696261bc07cacd9a837c501d9e0b7fed09e' +RELEASE_GIT_HEAD = '65cf46cddd873fd229dbb0fc0689bca4c201c6b6' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.12.13' +_pkg_version = '2024.12.23' From 0b6b7742c2e7f2a1fcb0b54ef3dd484bab404b3f Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Thu, 26 Dec 2024 14:19:17 +1300 Subject: [PATCH 5/5] [ie/youtube] Fix DASH formats incorrectly skipped in some situations (#11910) Closes https://github.com/yt-dlp/yt-dlp/issues/11907 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 33a93c5c9f..1e83e41b8f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -256,6 +256,7 @@ 'client': { 'clientName': 'MWEB', 'clientVersion': '2.20241202.07.00', + # mweb does not require PO Token with this UA 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', }, }, @@ -4051,7 +4052,7 @@ def _report_pot_format_skipped(self, video_id, client_name, proto): msg = ( f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. ' 'They will be skipped as they may yield HTTP Error 403. ' - f'You can manually pass a PO Token for this client with --extractor-args "youtube:po_token={client_name}+XXX. ' + f'You can manually pass a PO Token for this client with --extractor-args "youtube:po_token={client_name}+XXX". ' 'For more information, refer to https://github.com/yt-dlp/yt-dlp/wiki/Extractors#po-token-guide . ' 'To enable these broken formats anyway, pass --extractor-args "youtube:formats=missing_pot"') @@ -4271,7 +4272,6 @@ def process_manifest_format(f, proto, client_name, itag, po_token): key = (proto, f.get('language')) if not all_formats and key in itags[itag]: return False - itags[itag].add(key) if f.get('source_preference') is None: f['source_preference'] = -1 @@ -4285,6 +4285,8 @@ def process_manifest_format(f, proto, client_name, itag, po_token): f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ') f['source_preference'] -= 20 + itags[itag].add(key) + if itag and all_formats: f['format_id'] = f'{itag}-{proto}' elif any(p != proto for p, _ in itags[itag]):