mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-23 11:31:29 +01:00
Added option --break-on-reject
and modified `--break-on-existing`
This commit is contained in:
parent
90505ff153
commit
8b0d7497d5
|
@ -250,8 +250,10 @@ ## Video Selection:
|
||||||
--download-archive FILE Download only videos not listed in the
|
--download-archive FILE Download only videos not listed in the
|
||||||
archive file. Record the IDs of all
|
archive file. Record the IDs of all
|
||||||
downloaded videos in it.
|
downloaded videos in it.
|
||||||
--break-on-existing Stop the download process after attempting
|
--break-on-existing Stop the download process when encountering
|
||||||
to download a file that's in the archive.
|
a file that's in the archive.
|
||||||
|
--break-on-reject Stop the download process when encountering
|
||||||
|
a file that has been filtered out.
|
||||||
--no-download-archive Do not use archive file (default)
|
--no-download-archive Do not use archive file (default)
|
||||||
--include-ads Download advertisements as well
|
--include-ads Download advertisements as well
|
||||||
(experimental)
|
(experimental)
|
||||||
|
|
|
@ -58,6 +58,7 @@
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
|
ExistingVideoReached,
|
||||||
expand_path,
|
expand_path,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
@ -81,6 +82,7 @@
|
||||||
register_socks_protocols,
|
register_socks_protocols,
|
||||||
render_table,
|
render_table,
|
||||||
replace_extension,
|
replace_extension,
|
||||||
|
RejectedVideoReached,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
sanitize_path,
|
sanitize_path,
|
||||||
|
@ -232,6 +234,7 @@ class YoutubeDL(object):
|
||||||
again.
|
again.
|
||||||
break_on_existing: Stop the download process after attempting to download a file that's
|
break_on_existing: Stop the download process after attempting to download a file that's
|
||||||
in the archive.
|
in the archive.
|
||||||
|
break_on_reject: Stop the download process when encountering a video that has been filtered out.
|
||||||
cookiefile: File name where cookies should be read from and dumped to.
|
cookiefile: File name where cookies should be read from and dumped to.
|
||||||
nocheckcertificate:Do not verify SSL certificates
|
nocheckcertificate:Do not verify SSL certificates
|
||||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||||
|
@ -797,44 +800,53 @@ def prepare_filename(self, info_dict):
|
||||||
def _match_entry(self, info_dict, incomplete):
|
def _match_entry(self, info_dict, incomplete):
|
||||||
""" Returns None if the file should be downloaded """
|
""" Returns None if the file should be downloaded """
|
||||||
|
|
||||||
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
def check_filter():
|
||||||
if 'title' in info_dict:
|
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
||||||
# This can happen when we're just evaluating the playlist
|
if 'title' in info_dict:
|
||||||
title = info_dict['title']
|
# This can happen when we're just evaluating the playlist
|
||||||
matchtitle = self.params.get('matchtitle', False)
|
title = info_dict['title']
|
||||||
if matchtitle:
|
matchtitle = self.params.get('matchtitle', False)
|
||||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
if matchtitle:
|
||||||
return '"' + title + '" title did not match pattern "' + matchtitle + '"'
|
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||||
rejecttitle = self.params.get('rejecttitle', False)
|
return '"' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||||
if rejecttitle:
|
rejecttitle = self.params.get('rejecttitle', False)
|
||||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
if rejecttitle:
|
||||||
return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||||
date = info_dict.get('upload_date')
|
return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||||
if date is not None:
|
date = info_dict.get('upload_date')
|
||||||
dateRange = self.params.get('daterange', DateRange())
|
if date is not None:
|
||||||
if date not in dateRange:
|
dateRange = self.params.get('daterange', DateRange())
|
||||||
return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
if date not in dateRange:
|
||||||
view_count = info_dict.get('view_count')
|
return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||||
if view_count is not None:
|
view_count = info_dict.get('view_count')
|
||||||
min_views = self.params.get('min_views')
|
if view_count is not None:
|
||||||
if min_views is not None and view_count < min_views:
|
min_views = self.params.get('min_views')
|
||||||
return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
|
if min_views is not None and view_count < min_views:
|
||||||
max_views = self.params.get('max_views')
|
return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
|
||||||
if max_views is not None and view_count > max_views:
|
max_views = self.params.get('max_views')
|
||||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
if max_views is not None and view_count > max_views:
|
||||||
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||||
return 'Skipping "%s" because it is age restricted' % video_title
|
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||||
if self.in_download_archive(info_dict):
|
return 'Skipping "%s" because it is age restricted' % video_title
|
||||||
return '%s has already been recorded in archive' % video_title
|
if self.in_download_archive(info_dict):
|
||||||
|
return '%s has already been recorded in archive' % video_title
|
||||||
|
|
||||||
if not incomplete:
|
if not incomplete:
|
||||||
match_filter = self.params.get('match_filter')
|
match_filter = self.params.get('match_filter')
|
||||||
if match_filter is not None:
|
if match_filter is not None:
|
||||||
ret = match_filter(info_dict)
|
ret = match_filter(info_dict)
|
||||||
if ret is not None:
|
if ret is not None:
|
||||||
return ret
|
return ret
|
||||||
|
return None
|
||||||
|
|
||||||
return None
|
reason = check_filter()
|
||||||
|
if reason is not None:
|
||||||
|
self.to_screen('[download] ' + reason)
|
||||||
|
if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
|
||||||
|
raise ExistingVideoReached()
|
||||||
|
elif self.params.get('break_on_reject'):
|
||||||
|
raise RejectedVideoReached()
|
||||||
|
return reason
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_extra_info(info_dict, extra_info):
|
def add_extra_info(info_dict, extra_info):
|
||||||
|
@ -895,7 +907,7 @@ def wrapper(self, *args, **kwargs):
|
||||||
self.report_error(msg)
|
self.report_error(msg)
|
||||||
except ExtractorError as e: # An error we somewhat expected
|
except ExtractorError as e: # An error we somewhat expected
|
||||||
self.report_error(compat_str(e), e.format_traceback())
|
self.report_error(compat_str(e), e.format_traceback())
|
||||||
except MaxDownloadsReached:
|
except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.params.get('ignoreerrors', False):
|
if self.params.get('ignoreerrors', False):
|
||||||
|
@ -1098,14 +1110,7 @@ def report_download(num_entries):
|
||||||
'extractor_key': ie_result['extractor_key'],
|
'extractor_key': ie_result['extractor_key'],
|
||||||
}
|
}
|
||||||
|
|
||||||
reason = self._match_entry(entry, incomplete=True)
|
self._match_entry(entry, incomplete=True)
|
||||||
if reason is not None:
|
|
||||||
if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
|
|
||||||
print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
self.to_screen('[download] ' + reason)
|
|
||||||
continue
|
|
||||||
|
|
||||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||||
# TODO: skip failed (empty) entries?
|
# TODO: skip failed (empty) entries?
|
||||||
|
@ -1870,9 +1875,7 @@ def process_info(self, info_dict):
|
||||||
if 'format' not in info_dict:
|
if 'format' not in info_dict:
|
||||||
info_dict['format'] = info_dict['ext']
|
info_dict['format'] = info_dict['ext']
|
||||||
|
|
||||||
reason = self._match_entry(info_dict, incomplete=False)
|
if self._match_entry(info_dict, incomplete=False) is not None:
|
||||||
if reason is not None:
|
|
||||||
self.to_screen('[download] ' + reason)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
self._num_downloads += 1
|
self._num_downloads += 1
|
||||||
|
@ -2260,7 +2263,13 @@ def download(self, url_list):
|
||||||
except UnavailableVideoError:
|
except UnavailableVideoError:
|
||||||
self.report_error('unable to download video')
|
self.report_error('unable to download video')
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
self.to_screen('[info] Maximum number of downloaded files reached.')
|
self.to_screen('[info] Maximum number of downloaded files reached')
|
||||||
|
raise
|
||||||
|
except ExistingVideoReached:
|
||||||
|
self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
|
||||||
|
raise
|
||||||
|
except RejectedVideoReached:
|
||||||
|
self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
if self.params.get('dump_single_json', False):
|
if self.params.get('dump_single_json', False):
|
||||||
|
|
|
@ -26,11 +26,13 @@
|
||||||
decodeOption,
|
decodeOption,
|
||||||
DEFAULT_OUTTMPL,
|
DEFAULT_OUTTMPL,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
|
ExistingVideoReached,
|
||||||
expand_path,
|
expand_path,
|
||||||
match_filter_func,
|
match_filter_func,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
|
RejectedVideoReached,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
setproctitle,
|
setproctitle,
|
||||||
std_headers,
|
std_headers,
|
||||||
|
@ -449,6 +451,7 @@ def parse_retries(retries):
|
||||||
'age_limit': opts.age_limit,
|
'age_limit': opts.age_limit,
|
||||||
'download_archive': download_archive_fn,
|
'download_archive': download_archive_fn,
|
||||||
'break_on_existing': opts.break_on_existing,
|
'break_on_existing': opts.break_on_existing,
|
||||||
|
'break_on_reject': opts.break_on_reject,
|
||||||
'cookiefile': opts.cookiefile,
|
'cookiefile': opts.cookiefile,
|
||||||
'nocheckcertificate': opts.no_check_certificate,
|
'nocheckcertificate': opts.no_check_certificate,
|
||||||
'prefer_insecure': opts.prefer_insecure,
|
'prefer_insecure': opts.prefer_insecure,
|
||||||
|
@ -519,8 +522,8 @@ def parse_retries(retries):
|
||||||
retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
|
retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
|
||||||
else:
|
else:
|
||||||
retcode = ydl.download(all_urls)
|
retcode = ydl.download(all_urls)
|
||||||
except MaxDownloadsReached:
|
except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
|
||||||
ydl.to_screen('--max-download limit reached, aborting.')
|
ydl.to_screen('Aborting remaining downloads')
|
||||||
retcode = 101
|
retcode = 101
|
||||||
|
|
||||||
sys.exit(retcode)
|
sys.exit(retcode)
|
||||||
|
|
|
@ -367,7 +367,11 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser):
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--break-on-existing',
|
'--break-on-existing',
|
||||||
action='store_true', dest='break_on_existing', default=False,
|
action='store_true', dest='break_on_existing', default=False,
|
||||||
help="Stop the download process after attempting to download a file that's in the archive.")
|
help="Stop the download process when encountering a file that's in the archive.")
|
||||||
|
selection.add_option(
|
||||||
|
'--break-on-reject',
|
||||||
|
action='store_true', dest='break_on_reject', default=False,
|
||||||
|
help="Stop the download process when encountering a file that has been filtered out.")
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--no-download-archive',
|
'--no-download-archive',
|
||||||
dest='download_archive', action="store_const", const=None,
|
dest='download_archive', action="store_const", const=None,
|
||||||
|
|
|
@ -2433,6 +2433,16 @@ def __init__(self, msg):
|
||||||
self.msg = msg
|
self.msg = msg
|
||||||
|
|
||||||
|
|
||||||
|
class ExistingVideoReached(YoutubeDLError):
|
||||||
|
""" --max-downloads limit has been reached. """
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class RejectedVideoReached(YoutubeDLError):
|
||||||
|
""" --max-downloads limit has been reached. """
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class MaxDownloadsReached(YoutubeDLError):
|
class MaxDownloadsReached(YoutubeDLError):
|
||||||
""" --max-downloads limit has been reached. """
|
""" --max-downloads limit has been reached. """
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Reference in New Issue
Block a user