Ability to load playlist infojson

* If `--no-clean-infojson` is given, the video ids are saved/loaded from in the infojson along with their playlist index
* If a video entry that was not saved is requested, we fallback to using `webpage_url` to re-extract the entries

Related: https://github.com/yt-dlp/yt-dlp/issues/190#issuecomment-804921024
This commit is contained in:
pukkandan 2021-03-24 01:15:53 +05:30
parent 394dcd4486
commit 498f560638
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
2 changed files with 84 additions and 63 deletions

View File

@ -60,6 +60,7 @@
encode_compat_str, encode_compat_str,
encodeFilename, encodeFilename,
error_to_compat_str, error_to_compat_str,
EntryNotInPlaylist,
ExistingVideoReached, ExistingVideoReached,
expand_path, expand_path,
ExtractorError, ExtractorError,
@ -1180,48 +1181,16 @@ def __process_playlist(self, ie_result, download):
playlist = ie_result.get('title') or ie_result.get('id') playlist = ie_result.get('title') or ie_result.get('id')
self.to_screen('[download] Downloading playlist: %s' % playlist) self.to_screen('[download] Downloading playlist: %s' % playlist)
if self.params.get('allow_playlist_files', True): if 'entries' not in ie_result:
ie_copy = { raise EntryNotInPlaylist()
'playlist': playlist, incomplete_entries = bool(ie_result.get('requested_entries'))
'playlist_id': ie_result.get('id'), if incomplete_entries:
'playlist_title': ie_result.get('title'), def fill_missing_entries(entries, indexes):
'playlist_uploader': ie_result.get('uploader'), ret = [None] * max(*indexes)
'playlist_uploader_id': ie_result.get('uploader_id'), for i, entry in zip(indexes, entries):
'playlist_index': 0 ret[i - 1] = entry
} return ret
ie_copy.update(dict(ie_result)) ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
if self.params.get('writeinfojson', False):
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
if not self._ensure_dir_exists(encodeFilename(infofn)):
return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
self.to_screen('[info] Playlist metadata is already present')
else:
playlist_info = dict(ie_result)
# playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
try:
write_json_file(self.filter_requested_info(playlist_info, self.params.get('clean_infojson', True)), infofn)
except (OSError, IOError):
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
if self.params.get('writedescription', False):
descfn = self.prepare_filename(ie_copy, 'pl_description')
if not self._ensure_dir_exists(encodeFilename(descfn)):
return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Playlist description is already present')
elif ie_result.get('description') is None:
self.report_warning('There\'s no playlist description to write.')
else:
try:
self.to_screen('[info] Writing playlist description to: ' + descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(ie_result['description'])
except (OSError, IOError):
self.report_error('Cannot write playlist description file ' + descfn)
return
playlist_results = [] playlist_results = []
@ -1248,25 +1217,20 @@ def iter_playlistitems(format):
def make_playlistitems_entries(list_ie_entries): def make_playlistitems_entries(list_ie_entries):
num_entries = len(list_ie_entries) num_entries = len(list_ie_entries)
return [ for i in playlistitems:
list_ie_entries[i - 1] for i in playlistitems if -num_entries < i <= num_entries:
if -num_entries <= i - 1 < num_entries] yield list_ie_entries[i - 1]
elif incomplete_entries:
def report_download(num_entries): raise EntryNotInPlaylist()
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, num_entries))
if isinstance(ie_entries, list): if isinstance(ie_entries, list):
n_all_entries = len(ie_entries) n_all_entries = len(ie_entries)
if playlistitems: if playlistitems:
entries = make_playlistitems_entries(ie_entries) entries = list(make_playlistitems_entries(ie_entries))
else: else:
entries = ie_entries[playliststart:playlistend] entries = ie_entries[playliststart:playlistend]
n_entries = len(entries) n_entries = len(entries)
self.to_screen( msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
elif isinstance(ie_entries, PagedList): elif isinstance(ie_entries, PagedList):
if playlistitems: if playlistitems:
entries = [] entries = []
@ -1278,25 +1242,73 @@ def report_download(num_entries):
entries = ie_entries.getslice( entries = ie_entries.getslice(
playliststart, playlistend) playliststart, playlistend)
n_entries = len(entries) n_entries = len(entries)
report_download(n_entries) msg = 'Downloading %d videos' % n_entries
else: # iterable else: # iterable
if playlistitems: if playlistitems:
entries = make_playlistitems_entries(list(itertools.islice( entries = list(make_playlistitems_entries(list(itertools.islice(
ie_entries, 0, max(playlistitems)))) ie_entries, 0, max(playlistitems)))))
else: else:
entries = list(itertools.islice( entries = list(itertools.islice(
ie_entries, playliststart, playlistend)) ie_entries, playliststart, playlistend))
n_entries = len(entries) n_entries = len(entries)
report_download(n_entries) msg = 'Downloading %d videos' % n_entries
if any((entry is None for entry in entries)):
raise EntryNotInPlaylist()
if not playlistitems and (playliststart or playlistend):
playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
ie_result['entries'] = entries
ie_result['requested_entries'] = playlistitems
if self.params.get('allow_playlist_files', True):
ie_copy = {
'playlist': playlist,
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_index': 0
}
ie_copy.update(dict(ie_result))
if self.params.get('writeinfojson', False):
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
if not self._ensure_dir_exists(encodeFilename(infofn)):
return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
self.to_screen('[info] Playlist metadata is already present')
else:
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
try:
write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
except (OSError, IOError):
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
if self.params.get('writedescription', False):
descfn = self.prepare_filename(ie_copy, 'pl_description')
if not self._ensure_dir_exists(encodeFilename(descfn)):
return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Playlist description is already present')
elif ie_result.get('description') is None:
self.report_warning('There\'s no playlist description to write.')
else:
try:
self.to_screen('[info] Writing playlist description to: ' + descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(ie_result['description'])
except (OSError, IOError):
self.report_error('Cannot write playlist description file ' + descfn)
return
if self.params.get('playlistreverse', False): if self.params.get('playlistreverse', False):
entries = entries[::-1] entries = entries[::-1]
if self.params.get('playlistrandom', False): if self.params.get('playlistrandom', False):
random.shuffle(entries) random.shuffle(entries)
x_forwarded_for = ie_result.get('__x_forwarded_for_ip') x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
for i, entry in enumerate(entries, 1): for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
# This __x_forwarded_for_ip thing is a bit ugly but requires # This __x_forwarded_for_ip thing is a bit ugly but requires
@ -1310,7 +1322,7 @@ def report_download(num_entries):
'playlist_title': ie_result.get('title'), 'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart, 'playlist_index': playlistitems[i - 1] if playlistitems else i,
'extractor': ie_result['extractor'], 'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'], 'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']), 'webpage_url_basename': url_basename(ie_result['webpage_url']),
@ -2524,10 +2536,10 @@ def download_with_info_file(self, info_filename):
[info_filename], mode='r', [info_filename], mode='r',
openhook=fileinput.hook_encoded('utf-8'))) as f: openhook=fileinput.hook_encoded('utf-8'))) as f:
# FileInput doesn't have a read method, we can't call json.load # FileInput doesn't have a read method, we can't call json.load
info = self.filter_requested_info(json.loads('\n'.join(f))) info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
try: try:
self.process_ie_result(info, download=True) self.process_ie_result(info, download=True)
except DownloadError: except (DownloadError, EntryNotInPlaylist):
webpage_url = info.get('webpage_url') webpage_url = info.get('webpage_url')
if webpage_url is not None: if webpage_url is not None:
self.report_warning('The info failed to download, trying with "%s"' % webpage_url) self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
@ -2542,7 +2554,7 @@ def filter_requested_info(info_dict, actually_filter=True):
info_dict['epoch'] = int(time.time()) info_dict['epoch'] = int(time.time())
return info_dict return info_dict
exceptions = { exceptions = {
'remove': ['requested_formats', 'requested_subtitles', 'filepath', 'entries'], 'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
'keep': ['_type'], 'keep': ['_type'],
} }
keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove']) keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])

View File

@ -2423,6 +2423,15 @@ def __init__(self, msg, exc_info=None):
self.exc_info = exc_info self.exc_info = exc_info
class EntryNotInPlaylist(YoutubeDLError):
"""Entry not in playlist exception.
This exception will be thrown by YoutubeDL when a requested entry
is not found in the playlist info_dict
"""
pass
class SameFileError(YoutubeDLError): class SameFileError(YoutubeDLError):
"""Same File exception. """Same File exception.