From fe2ce85aff0aa03735fc0152bb8cb9c3d4ef0753 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 4 Mar 2023 01:13:05 +0530 Subject: [PATCH] Add option `--break-match-filters` * Deprecates `--break-on-reject` Closes #5962 --- README.md | 10 +++++---- yt_dlp/YoutubeDL.py | 50 ++++++++++++++++++++++++++++++--------------- yt_dlp/__init__.py | 2 +- yt_dlp/options.py | 16 +++++++++++---- yt_dlp/utils.py | 15 +++++++++----- 5 files changed, 62 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 045521b802..b490594847 100644 --- a/README.md +++ b/README.md @@ -114,7 +114,7 @@ # NEW FEATURES * **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` -* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc +* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc * **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc @@ -519,7 +519,10 @@ ## Video Selection: dogs" (caseless). Use "--match-filter -" to interactively ask whether to download each video - --no-match-filter Do not use generic video filter (default) + --no-match-filter Do not use any --match-filter (default) + --break-match-filters FILTER Same as "--match-filters" but stops the + download process when a video is rejected + --no-break-match-filters Do not use any --break-match-filters (default) --no-playlist Download only the video, if the URL refers to a video and a playlist --yes-playlist Download the playlist, if the URL refers to @@ -533,8 +536,6 @@ ## Video Selection: --max-downloads NUMBER Abort after downloading NUMBER files --break-on-existing Stop the download process when encountering a file that is in the archive - --break-on-reject Stop the download process when encountering - a file that has been filtered out --break-per-input Alters --max-downloads, --break-on-existing, --break-on-reject, and autonumber to reset per input URL @@ -2133,6 +2134,7 @@ #### Redundant options --reject-title REGEX --match-filter "title !~= (?i)REGEX" --min-views COUNT --match-filter "view_count >=? COUNT" --max-views COUNT --match-filter "view_count <=? COUNT" + --break-on-reject Use --break-match-filter --user-agent UA --add-header "User-Agent:UA" --referer URL --add-header "Referer:URL" --playlist-start NUMBER -I NUMBER: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1b77e12b87..5d21b43cf7 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -300,8 +300,6 @@ class YoutubeDL: Videos already present in the file are not downloaded again. break_on_existing: Stop the download process after attempting to download a file that is in the archive. - break_on_reject: Stop the download process when encountering a video that - has been filtered out. break_per_url: Whether break_on_reject and break_on_existing should act on each input URL as opposed to for the entire queue cookiefile: File name or text stream from where cookies should be read and dumped to @@ -414,6 +412,8 @@ class YoutubeDL: - If it returns None, the video is downloaded. - If it returns utils.NO_DEFAULT, the user is interactively asked whether to download the video. + - Raise utils.DownloadCancelled(msg) to abort remaining + downloads when a video is rejected. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For @@ -483,6 +483,9 @@ class YoutubeDL: The following options are deprecated and may be removed in the future: + break_on_reject: Stop the download process when encountering a video that + has been filtered out. + - `raise DownloadCancelled(msg)` in match_filter instead force_generic_extractor: Force downloader to use the generic extractor - Use allowed_extractors = ['generic', 'default'] playliststart: - Use playlist_items @@ -1407,31 +1410,44 @@ def check_filter(): return 'Skipping "%s" because it is age restricted' % video_title match_filter = self.params.get('match_filter') - if match_filter is not None: + if match_filter is None: + return None + + cancelled = None + try: try: ret = match_filter(info_dict, incomplete=incomplete) except TypeError: # For backward compatibility ret = None if incomplete else match_filter(info_dict) - if ret is NO_DEFAULT: - while True: - filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) - reply = input(self._format_screen( - f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() - if reply in {'y', ''}: - return None - elif reply == 'n': - return f'Skipping {video_title}' - elif ret is not None: - return ret - return None + except DownloadCancelled as err: + if err.msg is not NO_DEFAULT: + raise + ret, cancelled = err.msg, err + + if ret is NO_DEFAULT: + while True: + filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) + reply = input(self._format_screen( + f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() + if reply in {'y', ''}: + return None + elif reply == 'n': + if cancelled: + raise type(cancelled)(f'Skipping {video_title}') + return f'Skipping {video_title}' + return ret if self.in_download_archive(info_dict): reason = '%s has already been recorded in the archive' % video_title break_opt, break_err = 'break_on_existing', ExistingVideoReached else: - reason = check_filter() - break_opt, break_err = 'break_on_reject', RejectedVideoReached + try: + reason = check_filter() + except DownloadCancelled as e: + reason, break_opt, break_err = e.msg, 'match_filter', type(e) + else: + break_opt, break_err = 'break_on_reject', RejectedVideoReached if reason is not None: if not silent: self.to_screen('[download] ' + reason) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 15c6e997f8..9ef31601c9 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -403,7 +403,7 @@ def metadataparser_actions(f): except Exception: raise ValueError('unsupported geo-bypass country or ip-block') - opts.match_filter = match_filter_func(opts.match_filter) + opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter) if opts.download_archive is not None: opts.download_archive = expand_path(opts.download_archive) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index cb807740df..84aeda7f12 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -613,8 +613,16 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): 'Use "--match-filter -" to interactively ask whether to download each video')) selection.add_option( '--no-match-filter', - metavar='FILTER', dest='match_filter', action='store_const', const=None, - help='Do not use generic video filter (default)') + dest='match_filter', action='store_const', const=None, + help='Do not use any --match-filter (default)') + selection.add_option( + '--break-match-filters', + metavar='FILTER', dest='breaking_match_filter', action='append', + help='Same as "--match-filters" but stops the download process when a video is rejected') + selection.add_option( + '--no-break-match-filters', + dest='breaking_match_filter', action='store_const', const=None, + help='Do not use any --break-match-filters (default)') selection.add_option( '--no-playlist', action='store_true', dest='noplaylist', default=False, @@ -646,11 +654,11 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): selection.add_option( '--break-on-reject', action='store_true', dest='break_on_reject', default=False, - help='Stop the download process when encountering a file that has been filtered out') + help=optparse.SUPPRESS_HELP) selection.add_option( '--break-per-input', action='store_true', dest='break_per_url', default=False, - help='Alters --max-downloads, --break-on-existing, --break-on-reject, and autonumber to reset per input URL') + help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL') selection.add_option( '--no-break-per-input', action='store_false', dest='break_per_url', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index bf2eb6b380..e9b8894473 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1230,8 +1230,8 @@ class ExistingVideoReached(DownloadCancelled): class RejectedVideoReached(DownloadCancelled): - """ --break-on-reject triggered """ - msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject' + """ --break-match-filter triggered """ + msg = 'Encountered a video that did not match filter, stopping due to --break-match-filter' class MaxDownloadsReached(DownloadCancelled): @@ -3911,16 +3911,21 @@ def match_str(filter_str, dct, incomplete=False): for filter_part in re.split(r'(?