From 093a17107ea5e375ba606ed1c31d1c259f93e0df Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 21:41:41 +0530 Subject: [PATCH] Allow using a custom format selector through API Closes #1619, #1464 --- README.md | 51 ++++++++++++++++++++++++++++++++++++++------- yt_dlp/YoutubeDL.py | 13 +++++++++--- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 24975ad6ff..7a4ec55bb2 100644 --- a/README.md +++ b/README.md @@ -1600,14 +1600,14 @@ # EMBEDDING YT-DLP ```python from yt_dlp import YoutubeDL -ydl_opts = {} +ydl_opts = {'format': 'bestaudio'} with YoutubeDL(ydl_opts) as ydl: ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L154-L452). -Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), converts the video to an mp3 file, implements a custom postprocessor and prints the final info_dict as json: +Here's a more complete example demonstrating various functionality: ```python import json @@ -1633,23 +1633,56 @@ # EMBEDDING YT-DLP print(msg) +# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor class MyCustomPP(yt_dlp.postprocessor.PostProcessor): + # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run def run(self, info): self.to_screen('Doing stuff') return [], info +# ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL def my_hook(d): if d['status'] == 'finished': print('Done downloading, now converting ...') +def format_selector(ctx): + """ Select the best video and the best audio that won't result in an mkv. + This is just an example and does not handle all cases """ + + # formats are already sorted worst to best + formats = ctx.get('formats')[::-1] + + # acodec='none' means there is no audio + best_video = next(f for f in formats + if f['vcodec'] != 'none' and f['acodec'] == 'none') + + # find compatible audio extension + audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] + # vcodec='none' means there is no video + best_audio = next(f for f in formats if ( + f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) + + yield { + # These are the minimum required fields for a merged format + 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', + 'ext': best_video['ext'], + 'requested_formats': [best_video, best_audio], + # Must be + seperated list of protocols + 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' + } + + +# ℹ️ See docstring of yt_dlp.YoutubeDL for a description of the options ydl_opts = { - 'format': 'bestaudio/best', + 'format': format_selector, 'postprocessors': [{ - 'key': 'FFmpegExtractAudio', - 'preferredcodec': 'mp3', - 'preferredquality': '192', + # Embed metadata in video using ffmpeg. + # ℹ️ See yt_dlp.postprocessor.FFmpegMetadataPP for the arguments it accepts + 'key': 'FFmpegMetadata', + 'add_chapters': True, + 'add_metadata': True, }], 'logger': MyLogger(), 'progress_hooks': [my_hook], @@ -1659,14 +1692,16 @@ # EMBEDDING YT-DLP # Add custom headers yt_dlp.utils.std_headers.update({'Referer': 'https://www.google.com'}) +# ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions. +# Eg: "ydl.download", "ydl.download_with_info_file" with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.add_post_processor(MyCustomPP()) info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') + + # ℹ️ ydl.sanitize_info makes the info json-serializable print(json.dumps(ydl.sanitize_info(info))) ``` -See the public functions in [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py) for other available functions. Eg: `ydl.download`, `ydl.download_with_info_file` - **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2439fc82bd..5d6b1d5b2a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -211,6 +211,9 @@ class YoutubeDL(object): simulate: Do not download the video files. If unset (or None), simulate only if listsubtitles, listformats or list_thumbnails is used format: Video format code. see "FORMAT SELECTION" for more details. + You can also pass a function. The function takes 'ctx' as + argument and returns the formats to download. + See "build_format_selector" for an implementation allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually @@ -613,6 +616,7 @@ def check_deprecated(param, option, suggestion): # Creating format selector here allows us to catch syntax errors before the extraction self.format_selector = ( None if self.params.get('format') is None + else self.params['format'] if callable(self.params['format']) else self.build_format_selector(self.params['format'])) self._setup_opener() @@ -1927,9 +1931,9 @@ def _merge(formats_pair): 'format_id': '+'.join(filtered('format_id')), 'ext': output_ext, 'protocol': '+'.join(map(determine_protocol, formats_info)), - 'language': '+'.join(orderedSet(filtered('language'))), - 'format_note': '+'.join(orderedSet(filtered('format_note'))), - 'filesize_approx': sum(filtered('filesize', 'filesize_approx')), + 'language': '+'.join(orderedSet(filtered('language'))) or None, + 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None, + 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None, 'tbr': sum(filtered('tbr', 'vbr', 'abr')), } @@ -2357,6 +2361,9 @@ def is_wellformed(f): info_dict, _ = self.pre_process(info_dict) + # The pre-processors may have modified the formats + formats = info_dict.get('formats', [info_dict]) + if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) if self.params.get('listformats'):