From 8fa43c73d83619722c7e30d70247eaa9b7f52810 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 11 Apr 2021 03:48:52 +0530 Subject: [PATCH] Add option `--convert-thumbnails` Closes: https://github.com/yt-dlp/yt-dlp/issues/99 https://github.com/yt-dlp/yt-dlp/issues/102 --- README.md | 38 +++++++------- yt_dlp/__init__.py | 10 ++++ yt_dlp/options.py | 13 +++-- yt_dlp/postprocessor/__init__.py | 2 + yt_dlp/postprocessor/embedthumbnail.py | 51 +++++-------------- yt_dlp/postprocessor/ffmpeg.py | 70 ++++++++++++++++++++++++++ 6 files changed, 124 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index a7832508b8..6bf4625a2c 100644 --- a/README.md +++ b/README.md @@ -639,24 +639,24 @@ ## Post-Processing Options: Specify the postprocessor/executable name and the arguments separated by a colon ":" to give the argument to the specified - postprocessor/executable. Supported - postprocessors are: SponSkrub, - ExtractAudio, VideoRemuxer, VideoConvertor, - EmbedSubtitle, Metadata, Merger, - FixupStretched, FixupM4a, FixupM3u8, - SubtitlesConvertor, EmbedThumbnail and - SplitChapters. The supported executables - are: SponSkrub, FFmpeg, FFprobe, and - AtomicParsley. You can also specify - "PP+EXE:ARGS" to give the arguments to the - specified executable only when being used - by the specified postprocessor. - Additionally, for ffmpeg/ffprobe, "_i"/"_o" - can be appended to the prefix optionally - followed by a number to pass the argument - before the specified input/output file. Eg: - --ppa "Merger+ffmpeg_i1:-v quiet". You can - use this option multiple times to give + postprocessor/executable. Supported PP are: + Merger, ExtractAudio, SplitChapters, + Metadata, EmbedSubtitle, EmbedThumbnail, + SubtitlesConvertor, ThumbnailsConvertor, + VideoRemuxer, VideoConvertor, SponSkrub, + FixupStretched, FixupM4a and FixupM3u8. The + supported executables are: AtomicParsley, + FFmpeg, FFprobe, and SponSkrub. You can + also specify "PP+EXE:ARGS" to give the + arguments to the specified executable only + when being used by the specified + postprocessor. Additionally, for + ffmpeg/ffprobe, "_i"/"_o" can be appended + to the prefix optionally followed by a + number to pass the argument before the + specified input/output file. Eg: --ppa + "Merger+ffmpeg_i1:-v quiet". You can use + this option multiple times to give different arguments to different postprocessors. (Alias: --ppa) -k, --keep-video Keep the intermediate video file on disk @@ -697,6 +697,8 @@ ## Post-Processing Options: --convert-subs FORMAT Convert the subtitles to another format (currently supported: srt|ass|vtt|lrc) (Alias: --convert-subtitles) + --convert-thumbnails FORMAT Convert the thumbnails to another format + (currently supported: jpg) --split-chapters Split video into multiple files based on internal chapters. The "chapter:" prefix can be used with "--paths" and "--output" diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index efb852891e..90a3116ea4 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -230,6 +230,9 @@ def parse_retries(retries, name=''): if opts.convertsubtitles is not None: if opts.convertsubtitles not in ('srt', 'vtt', 'ass', 'lrc'): parser.error('invalid subtitle format specified') + if opts.convertthumbnails is not None: + if opts.convertthumbnails not in ('jpg', ): + parser.error('invalid thumbnail format specified') if opts.date is not None: date = DateRange.day(opts.date) @@ -332,6 +335,13 @@ def report_conflict(arg1, arg2): # Run this before the actual video download 'when': 'before_dl' }) + if opts.convertthumbnails: + postprocessors.append({ + 'key': 'FFmpegThumbnailsConvertor', + 'format': opts.convertthumbnails, + # Run this before the actual video download + 'when': 'before_dl' + }) if opts.extractaudio: postprocessors.append({ 'key': 'FFmpegExtractAudio', diff --git a/yt_dlp/options.py b/yt_dlp/options.py index c4cb57e2fa..574af0a543 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1109,10 +1109,11 @@ def _dict_from_multiple_values_options_callback( help=( 'Give these arguments to the postprocessors. ' 'Specify the postprocessor/executable name and the arguments separated by a colon ":" ' - 'to give the argument to the specified postprocessor/executable. Supported postprocessors are: ' - 'SponSkrub, ExtractAudio, VideoRemuxer, VideoConvertor, EmbedSubtitle, Metadata, Merger, ' - 'FixupStretched, FixupM4a, FixupM3u8, SubtitlesConvertor, EmbedThumbnail and SplitChapters. ' - 'The supported executables are: SponSkrub, FFmpeg, FFprobe, and AtomicParsley. ' + 'to give the argument to the specified postprocessor/executable. Supported PP are: ' + 'Merger, ExtractAudio, SplitChapters, Metadata, EmbedSubtitle, EmbedThumbnail, ' + 'SubtitlesConvertor, ThumbnailsConvertor, VideoRemuxer, VideoConvertor, ' + 'SponSkrub, FixupStretched, FixupM4a and FixupM3u8. ' + 'The supported executables are: AtomicParsley, FFmpeg, FFprobe, and SponSkrub. ' 'You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable ' 'only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, ' '"_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument ' @@ -1204,6 +1205,10 @@ def _dict_from_multiple_values_options_callback( '--convert-subs', '--convert-sub', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, help='Convert the subtitles to another format (currently supported: srt|ass|vtt|lrc) (Alias: --convert-subtitles)') + postproc.add_option( + '--convert-thumbnails', + metavar='FORMAT', dest='convertthumbnails', default=None, + help='Convert the thumbnails to another format (currently supported: jpg)') postproc.add_option( '--split-chapters', '--split-tracks', dest='split_chapters', action='store_true', default=False, diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index 5c0679815d..fe69c2c728 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -13,6 +13,7 @@ FFmpegVideoConvertorPP, FFmpegVideoRemuxerPP, FFmpegSubtitlesConvertorPP, + FFmpegThumbnailsConvertorPP, FFmpegSplitChaptersPP, ) from .xattrpp import XAttrMetadataPP @@ -40,6 +41,7 @@ def get_postprocessor(key): 'FFmpegMetadataPP', 'FFmpegPostProcessor', 'FFmpegSubtitlesConvertorPP', + 'FFmpegThumbnailsConvertorPP', 'FFmpegVideoConvertorPP', 'FFmpegVideoRemuxerPP', 'MetadataFromFieldPP', diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 25124161a7..3be698bce7 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -13,8 +13,10 @@ except ImportError: has_mutagen = False -from .ffmpeg import FFmpegPostProcessor - +from .ffmpeg import ( + FFmpegPostProcessor, + FFmpegThumbnailsConvertorPP, +) from ..utils import ( check_executable, encodeArgument, @@ -23,7 +25,6 @@ PostProcessingError, prepend_extension, process_communicate_or_kill, - replace_extension, shell_quote, ) @@ -35,7 +36,7 @@ class EmbedThumbnailPPError(PostProcessingError): class EmbedThumbnailPP(FFmpegPostProcessor): def __init__(self, downloader=None, already_have_thumbnail=False): - super(EmbedThumbnailPP, self).__init__(downloader) + FFmpegPostProcessor.__init__(self, downloader) self._already_have_thumbnail = already_have_thumbnail def run(self, info): @@ -46,44 +47,21 @@ def run(self, info): self.to_screen('There aren\'t any thumbnails to embed') return [], info - initial_thumbnail = original_thumbnail = thumbnail_filename = info['thumbnails'][-1]['filepath'] - + thumbnail_filename = info['thumbnails'][-1]['filepath'] if not os.path.exists(encodeFilename(thumbnail_filename)): self.report_warning('Skipping embedding the thumbnail because the file is missing.') return [], info - def is_webp(path): - with open(encodeFilename(path), 'rb') as f: - b = f.read(12) - return b[0:4] == b'RIFF' and b[8:] == b'WEBP' - # Correct extension for WebP file with wrong extension (see #25687, #25717) - _, thumbnail_ext = os.path.splitext(thumbnail_filename) - if thumbnail_ext: - thumbnail_ext = thumbnail_ext[1:].lower() - if thumbnail_ext != 'webp' and is_webp(thumbnail_filename): - self.to_screen('Correcting extension to webp and escaping path for thumbnail "%s"' % thumbnail_filename) - thumbnail_webp_filename = replace_extension(thumbnail_filename, 'webp') - if os.path.exists(thumbnail_webp_filename): - os.remove(thumbnail_webp_filename) - os.rename(encodeFilename(thumbnail_filename), encodeFilename(thumbnail_webp_filename)) - original_thumbnail = thumbnail_filename = thumbnail_webp_filename - thumbnail_ext = 'webp' + convertor = FFmpegThumbnailsConvertorPP(self._downloader) + convertor.fixup_webp(info, -1) + + original_thumbnail = thumbnail_filename = info['thumbnails'][-1]['filepath'] # Convert unsupported thumbnail formats to JPEG (see #25687, #25717) - if thumbnail_ext not in ['jpg', 'png']: - # NB: % is supposed to be escaped with %% but this does not work - # for input files so working around with standard substitution - escaped_thumbnail_filename = thumbnail_filename.replace('%', '#') - os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename)) - escaped_thumbnail_jpg_filename = replace_extension(escaped_thumbnail_filename, 'jpg') - self.to_screen('Converting thumbnail "%s" to JPEG' % escaped_thumbnail_filename) - self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_jpg_filename, ['-bsf:v', 'mjpeg2jpeg']) - thumbnail_jpg_filename = replace_extension(thumbnail_filename, 'jpg') - # Rename back to unescaped for further processing - os.rename(encodeFilename(escaped_thumbnail_filename), encodeFilename(thumbnail_filename)) - os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename)) - thumbnail_filename = thumbnail_jpg_filename + _, thumbnail_ext = os.path.splitext(thumbnail_filename) + if thumbnail_ext not in ('jpg', 'png'): + thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'jpg') thumbnail_ext = 'jpg' mtime = os.stat(encodeFilename(filename)).st_mtime @@ -194,9 +172,6 @@ def is_webp(path): files_to_delete = [thumbnail_filename] if self._already_have_thumbnail: - info['__files_to_move'][original_thumbnail] = replace_extension( - info['__files_to_move'][initial_thumbnail], - os.path.splitext(original_thumbnail)[1][1:]) if original_thumbnail == thumbnail_filename: files_to_delete = [] elif original_thumbnail != thumbnail_filename: diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index accd715bed..0e160f5dcd 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -816,3 +816,73 @@ def run(self, info): destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) self.real_run_ffmpeg([(info['filepath'], opts)], [(destination, ['-c', 'copy'])]) return [], info + + +class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): + def __init__(self, downloader=None, format=None): + super(FFmpegThumbnailsConvertorPP, self).__init__(downloader) + self.format = format + + @staticmethod + def is_webp(path): + with open(encodeFilename(path), 'rb') as f: + b = f.read(12) + return b[0:4] == b'RIFF' and b[8:] == b'WEBP' + + def fixup_webp(self, info, idx=-1): + thumbnail_filename = info['thumbnails'][idx]['filepath'] + _, thumbnail_ext = os.path.splitext(thumbnail_filename) + if thumbnail_ext: + thumbnail_ext = thumbnail_ext[1:].lower() + if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename): + self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename) + webp_filename = replace_extension(thumbnail_filename, 'webp') + if os.path.exists(webp_filename): + os.remove(webp_filename) + os.rename(encodeFilename(thumbnail_filename), encodeFilename(webp_filename)) + info['thumbnails'][idx]['filepath'] = webp_filename + info['__files_to_move'][webp_filename] = replace_extension( + info['__files_to_move'].pop(thumbnail_filename), 'webp') + + def convert_thumbnail(self, thumbnail_filename, ext): + if ext != 'jpg': + raise FFmpegPostProcessorError('Only conversion to jpg is currently supported') + # NB: % is supposed to be escaped with %% but this does not work + # for input files so working around with standard substitution + escaped_thumbnail_filename = thumbnail_filename.replace('%', '#') + os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename)) + escaped_thumbnail_jpg_filename = replace_extension(escaped_thumbnail_filename, 'jpg') + self.to_screen('Converting thumbnail "%s" to JPEG' % escaped_thumbnail_filename) + self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_jpg_filename, ['-bsf:v', 'mjpeg2jpeg']) + thumbnail_jpg_filename = replace_extension(thumbnail_filename, 'jpg') + # Rename back to unescaped + os.rename(encodeFilename(escaped_thumbnail_filename), encodeFilename(thumbnail_filename)) + os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename)) + return thumbnail_jpg_filename + + def run(self, info): + if self.format != 'jpg': + raise FFmpegPostProcessorError('Only conversion to jpg is currently supported') + files_to_delete = [] + has_thumbnail = False + + for idx, thumbnail_dict in enumerate(info['thumbnails']): + if 'filepath' not in thumbnail_dict: + continue + has_thumbnail = True + self.fixup_webp(info, idx) + original_thumbnail = thumbnail_dict['filepath'] + _, thumbnail_ext = os.path.splitext(original_thumbnail) + if thumbnail_ext: + thumbnail_ext = thumbnail_ext[1:].lower() + if thumbnail_ext == self.format: + self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail) + continue + thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format) + files_to_delete.append(original_thumbnail) + info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension( + info['__files_to_move'][original_thumbnail], self.format) + + if not has_thumbnail: + self.to_screen('There aren\'t any thumbnails to convert') + return files_to_delete, info