mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 14:14:54 +01:00
Ability to load playlist infojson
* If `--no-clean-infojson` is given, the video ids are saved/loaded from in the infojson along with their playlist index * If a video entry that was not saved is requested, we fallback to using `webpage_url` to re-extract the entries Related: https://github.com/yt-dlp/yt-dlp/issues/190#issuecomment-804921024
This commit is contained in:
parent
394dcd4486
commit
498f560638
|
@ -60,6 +60,7 @@
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
|
EntryNotInPlaylist,
|
||||||
ExistingVideoReached,
|
ExistingVideoReached,
|
||||||
expand_path,
|
expand_path,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
@ -1180,48 +1181,16 @@ def __process_playlist(self, ie_result, download):
|
||||||
playlist = ie_result.get('title') or ie_result.get('id')
|
playlist = ie_result.get('title') or ie_result.get('id')
|
||||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||||
|
|
||||||
if self.params.get('allow_playlist_files', True):
|
if 'entries' not in ie_result:
|
||||||
ie_copy = {
|
raise EntryNotInPlaylist()
|
||||||
'playlist': playlist,
|
incomplete_entries = bool(ie_result.get('requested_entries'))
|
||||||
'playlist_id': ie_result.get('id'),
|
if incomplete_entries:
|
||||||
'playlist_title': ie_result.get('title'),
|
def fill_missing_entries(entries, indexes):
|
||||||
'playlist_uploader': ie_result.get('uploader'),
|
ret = [None] * max(*indexes)
|
||||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
for i, entry in zip(indexes, entries):
|
||||||
'playlist_index': 0
|
ret[i - 1] = entry
|
||||||
}
|
return ret
|
||||||
ie_copy.update(dict(ie_result))
|
ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
|
||||||
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
|
|
||||||
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
|
||||||
return
|
|
||||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
|
||||||
self.to_screen('[info] Playlist metadata is already present')
|
|
||||||
else:
|
|
||||||
playlist_info = dict(ie_result)
|
|
||||||
# playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
|
|
||||||
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
|
|
||||||
try:
|
|
||||||
write_json_file(self.filter_requested_info(playlist_info, self.params.get('clean_infojson', True)), infofn)
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
|
|
||||||
|
|
||||||
if self.params.get('writedescription', False):
|
|
||||||
descfn = self.prepare_filename(ie_copy, 'pl_description')
|
|
||||||
if not self._ensure_dir_exists(encodeFilename(descfn)):
|
|
||||||
return
|
|
||||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
|
||||||
self.to_screen('[info] Playlist description is already present')
|
|
||||||
elif ie_result.get('description') is None:
|
|
||||||
self.report_warning('There\'s no playlist description to write.')
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
self.to_screen('[info] Writing playlist description to: ' + descfn)
|
|
||||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
|
||||||
descfile.write(ie_result['description'])
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error('Cannot write playlist description file ' + descfn)
|
|
||||||
return
|
|
||||||
|
|
||||||
playlist_results = []
|
playlist_results = []
|
||||||
|
|
||||||
|
@ -1248,25 +1217,20 @@ def iter_playlistitems(format):
|
||||||
|
|
||||||
def make_playlistitems_entries(list_ie_entries):
|
def make_playlistitems_entries(list_ie_entries):
|
||||||
num_entries = len(list_ie_entries)
|
num_entries = len(list_ie_entries)
|
||||||
return [
|
for i in playlistitems:
|
||||||
list_ie_entries[i - 1] for i in playlistitems
|
if -num_entries < i <= num_entries:
|
||||||
if -num_entries <= i - 1 < num_entries]
|
yield list_ie_entries[i - 1]
|
||||||
|
elif incomplete_entries:
|
||||||
def report_download(num_entries):
|
raise EntryNotInPlaylist()
|
||||||
self.to_screen(
|
|
||||||
'[%s] playlist %s: Downloading %d videos' %
|
|
||||||
(ie_result['extractor'], playlist, num_entries))
|
|
||||||
|
|
||||||
if isinstance(ie_entries, list):
|
if isinstance(ie_entries, list):
|
||||||
n_all_entries = len(ie_entries)
|
n_all_entries = len(ie_entries)
|
||||||
if playlistitems:
|
if playlistitems:
|
||||||
entries = make_playlistitems_entries(ie_entries)
|
entries = list(make_playlistitems_entries(ie_entries))
|
||||||
else:
|
else:
|
||||||
entries = ie_entries[playliststart:playlistend]
|
entries = ie_entries[playliststart:playlistend]
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
self.to_screen(
|
msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
|
||||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
|
||||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
|
||||||
elif isinstance(ie_entries, PagedList):
|
elif isinstance(ie_entries, PagedList):
|
||||||
if playlistitems:
|
if playlistitems:
|
||||||
entries = []
|
entries = []
|
||||||
|
@ -1278,25 +1242,73 @@ def report_download(num_entries):
|
||||||
entries = ie_entries.getslice(
|
entries = ie_entries.getslice(
|
||||||
playliststart, playlistend)
|
playliststart, playlistend)
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
report_download(n_entries)
|
msg = 'Downloading %d videos' % n_entries
|
||||||
else: # iterable
|
else: # iterable
|
||||||
if playlistitems:
|
if playlistitems:
|
||||||
entries = make_playlistitems_entries(list(itertools.islice(
|
entries = list(make_playlistitems_entries(list(itertools.islice(
|
||||||
ie_entries, 0, max(playlistitems))))
|
ie_entries, 0, max(playlistitems)))))
|
||||||
else:
|
else:
|
||||||
entries = list(itertools.islice(
|
entries = list(itertools.islice(
|
||||||
ie_entries, playliststart, playlistend))
|
ie_entries, playliststart, playlistend))
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
report_download(n_entries)
|
msg = 'Downloading %d videos' % n_entries
|
||||||
|
|
||||||
|
if any((entry is None for entry in entries)):
|
||||||
|
raise EntryNotInPlaylist()
|
||||||
|
if not playlistitems and (playliststart or playlistend):
|
||||||
|
playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
|
||||||
|
ie_result['entries'] = entries
|
||||||
|
ie_result['requested_entries'] = playlistitems
|
||||||
|
|
||||||
|
if self.params.get('allow_playlist_files', True):
|
||||||
|
ie_copy = {
|
||||||
|
'playlist': playlist,
|
||||||
|
'playlist_id': ie_result.get('id'),
|
||||||
|
'playlist_title': ie_result.get('title'),
|
||||||
|
'playlist_uploader': ie_result.get('uploader'),
|
||||||
|
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||||
|
'playlist_index': 0
|
||||||
|
}
|
||||||
|
ie_copy.update(dict(ie_result))
|
||||||
|
|
||||||
|
if self.params.get('writeinfojson', False):
|
||||||
|
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
|
||||||
|
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
||||||
|
return
|
||||||
|
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
||||||
|
self.to_screen('[info] Playlist metadata is already present')
|
||||||
|
else:
|
||||||
|
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
|
||||||
|
try:
|
||||||
|
write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
|
||||||
|
|
||||||
|
if self.params.get('writedescription', False):
|
||||||
|
descfn = self.prepare_filename(ie_copy, 'pl_description')
|
||||||
|
if not self._ensure_dir_exists(encodeFilename(descfn)):
|
||||||
|
return
|
||||||
|
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
||||||
|
self.to_screen('[info] Playlist description is already present')
|
||||||
|
elif ie_result.get('description') is None:
|
||||||
|
self.report_warning('There\'s no playlist description to write.')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self.to_screen('[info] Writing playlist description to: ' + descfn)
|
||||||
|
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||||
|
descfile.write(ie_result['description'])
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error('Cannot write playlist description file ' + descfn)
|
||||||
|
return
|
||||||
|
|
||||||
if self.params.get('playlistreverse', False):
|
if self.params.get('playlistreverse', False):
|
||||||
entries = entries[::-1]
|
entries = entries[::-1]
|
||||||
|
|
||||||
if self.params.get('playlistrandom', False):
|
if self.params.get('playlistrandom', False):
|
||||||
random.shuffle(entries)
|
random.shuffle(entries)
|
||||||
|
|
||||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||||
|
|
||||||
|
self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
|
||||||
for i, entry in enumerate(entries, 1):
|
for i, entry in enumerate(entries, 1):
|
||||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||||
|
@ -1310,7 +1322,7 @@ def report_download(num_entries):
|
||||||
'playlist_title': ie_result.get('title'),
|
'playlist_title': ie_result.get('title'),
|
||||||
'playlist_uploader': ie_result.get('uploader'),
|
'playlist_uploader': ie_result.get('uploader'),
|
||||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
'playlist_index': playlistitems[i - 1] if playlistitems else i,
|
||||||
'extractor': ie_result['extractor'],
|
'extractor': ie_result['extractor'],
|
||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
|
@ -2524,10 +2536,10 @@ def download_with_info_file(self, info_filename):
|
||||||
[info_filename], mode='r',
|
[info_filename], mode='r',
|
||||||
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
||||||
# FileInput doesn't have a read method, we can't call json.load
|
# FileInput doesn't have a read method, we can't call json.load
|
||||||
info = self.filter_requested_info(json.loads('\n'.join(f)))
|
info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
|
||||||
try:
|
try:
|
||||||
self.process_ie_result(info, download=True)
|
self.process_ie_result(info, download=True)
|
||||||
except DownloadError:
|
except (DownloadError, EntryNotInPlaylist):
|
||||||
webpage_url = info.get('webpage_url')
|
webpage_url = info.get('webpage_url')
|
||||||
if webpage_url is not None:
|
if webpage_url is not None:
|
||||||
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
|
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
|
||||||
|
@ -2542,7 +2554,7 @@ def filter_requested_info(info_dict, actually_filter=True):
|
||||||
info_dict['epoch'] = int(time.time())
|
info_dict['epoch'] = int(time.time())
|
||||||
return info_dict
|
return info_dict
|
||||||
exceptions = {
|
exceptions = {
|
||||||
'remove': ['requested_formats', 'requested_subtitles', 'filepath', 'entries'],
|
'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
|
||||||
'keep': ['_type'],
|
'keep': ['_type'],
|
||||||
}
|
}
|
||||||
keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
|
keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
|
||||||
|
|
|
@ -2423,6 +2423,15 @@ def __init__(self, msg, exc_info=None):
|
||||||
self.exc_info = exc_info
|
self.exc_info = exc_info
|
||||||
|
|
||||||
|
|
||||||
|
class EntryNotInPlaylist(YoutubeDLError):
|
||||||
|
"""Entry not in playlist exception.
|
||||||
|
|
||||||
|
This exception will be thrown by YoutubeDL when a requested entry
|
||||||
|
is not found in the playlist info_dict
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class SameFileError(YoutubeDLError):
|
class SameFileError(YoutubeDLError):
|
||||||
"""Same File exception.
|
"""Same File exception.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user