From 3d3bb1688bfc5373105e6bf7c3d4729cf3f78788 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 17 Apr 2022 23:19:53 +0530 Subject: [PATCH] [docs] Improve embedding docs and other minor fixes --- CONTRIBUTING.md | 10 +-- README.md | 147 +++++++++++++++++++++++---------- yt_dlp/__init__.py | 5 +- yt_dlp/extractor/kakao.py | 1 + yt_dlp/postprocessor/common.py | 3 +- 5 files changed, 116 insertions(+), 50 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eff6becacb..19888cff40 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -374,21 +374,21 @@ ### Provide fallbacks #### Example -Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like: +Say `meta` from the previous example has a `title` and you are about to extract it like: ```python -title = meta['title'] +title = meta.get('title') ``` -If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. +If `title` disappears from `meta` in future due to some changes on the hoster's side the title extraction would fail. -Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: +Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback like: ```python title = meta.get('title') or self._og_search_title(webpage) ``` -This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. +This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`, making the extractor more robust. ### Regular expressions diff --git a/README.md b/README.md index 8a8477c9bd..197d7b49b3 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,7 @@ ### Differences in default behavior * youtube-dl tries to remove some superfluous punctuations from filenames. While this can sometimes be helpfull, it is often undesirable. So yt-dlp tries to keep the fields in the filenames as close to their original values as possible. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior For ease of use, a few more compat options are available: + * `--compat-options all`: Use all compat options * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` @@ -166,7 +167,7 @@ ### Using the release binary [![Linux](https://img.shields.io/badge/-Linux/MacOS/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) [![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) [![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) -[![ALl versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) +[![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) @@ -485,7 +486,7 @@ ## Download Options: -R, --retries RETRIES Number of retries (default is 10), or "infinite" --file-access-retries RETRIES Number of times to retry on file access - error (default is 10), or "infinite" + error (default is 3), or "infinite" --fragment-retries RETRIES Number of retries for a fragment (default is 10), or "infinite" (DASH, hlsnative and ISM) @@ -925,8 +926,8 @@ ## Post-Processing Options: same codecs and number of streams to be concatable. The "pl_video:" prefix can be used with "--paths" and "--output" to set - the output filename for the split files. - See "OUTPUT TEMPLATE" for details + the output filename for the concatenated + files. See "OUTPUT TEMPLATE" for details --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the @@ -1063,8 +1064,9 @@ # CONFIGURATION * `%APPDATA%/yt-dlp/config.txt` * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` - + `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`), or `%HOMEDRIVE%%HOMEPATH%` + 1. **System Configuration**: `/etc/yt-dlp.conf` For example, with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: @@ -1121,6 +1123,7 @@ # OUTPUT TEMPLATE It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: + 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` @@ -1601,7 +1604,9 @@ # MODIFYING METADATA Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`. This option also has a few special uses: + * You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?Phttps?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description + * You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta_` prefix (Eg: `meta1_language`). Any value set to the `meta_` field will overwrite all default values. **Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes. @@ -1743,19 +1748,72 @@ # EMBEDDING YT-DLP ```python from yt_dlp import YoutubeDL -ydl_opts = {'format': 'bestaudio'} -with YoutubeDL(ydl_opts) as ydl: - ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] +with YoutubeDL() as ydl: + ydl.download(URLS) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L181). -Here's a more complete example demonstrating various functionality: +**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above + +## Embedding examples + +### Extracting information ```python import json import yt_dlp +URL = 'https://www.youtube.com/watch?v=BaW_jenozKc' + +# ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functions +ydl_opts = {} +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(URL, download=False) + + # ℹ️ ydl.sanitize_info makes the info json-serializable + print(json.dumps(ydl.sanitize_info(info))) +``` +### Download from info-json + +```python +import yt_dlp + +INFO_FILE = 'path/to/video.info.json' + +with yt_dlp.YoutubeDL() as ydl: + error_code = ydl.download_with_info_file(INFO_FILE) + +print('Some videos failed to download' if error_code + else 'All videos successfully downloaded') +``` + +### Extract audio + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +ydl_opts = { + 'format': 'm4a/bestaudio/best' + # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments + 'postprocessors': [{ # Extract audio using ffmpeg + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'm4a', + }] +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + error_code = ydl.download(URLS) +``` +### Adding logger and progress hook + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] class MyLogger: def debug(self, msg): @@ -1776,23 +1834,51 @@ # EMBEDDING YT-DLP print(msg) -# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor +# ℹ️ See "progress_hooks" in help(yt_dlp.YoutubeDL) +def my_hook(d): + if d['status'] == 'finished': + print('Done downloading, now post-processing ...') + + +ydl_opts = { + 'logger': MyLogger(), + 'progress_hooks': [my_hook], +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download(URLS) +``` + +### Add a custom PostProcessor + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +# ℹ️ See help(yt_dlp.postprocessor.PostProcessor) class MyCustomPP(yt_dlp.postprocessor.PostProcessor): - # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run def run(self, info): self.to_screen('Doing stuff') return [], info -# ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL -def my_hook(d): - if d['status'] == 'finished': - print('Done downloading, now converting ...') +with yt_dlp.YoutubeDL() as ydl: + ydl.add_post_processor(MyCustomPP()) + ydl.download(URLS) +``` +### Use a custom format selector + +```python +import yt_dlp + +URL = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + def format_selector(ctx): """ Select the best video and the best audio that won't result in an mkv. - This is just an example and does not handle all cases """ + NOTE: This is just an example and does not handle all cases """ # formats are already sorted worst to best formats = ctx.get('formats')[::-1] @@ -1807,8 +1893,8 @@ # ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL best_audio = next(f for f in formats if ( f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) + # These are the minimum required fields for a merged format yield { - # These are the minimum required fields for a merged format 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', 'ext': best_video['ext'], 'requested_formats': [best_video, best_audio], @@ -1817,36 +1903,14 @@ # ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL } -# ℹ️ See docstring of yt_dlp.YoutubeDL for a description of the options ydl_opts = { 'format': format_selector, - 'postprocessors': [{ - # Embed metadata in video using ffmpeg. - # ℹ️ See yt_dlp.postprocessor.FFmpegMetadataPP for the arguments it accepts - 'key': 'FFmpegMetadata', - 'add_chapters': True, - 'add_metadata': True, - }], - 'logger': MyLogger(), - 'progress_hooks': [my_hook], - # Add custom headers - 'http_headers': {'Referer': 'https://www.google.com'} } - -# ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions. -# Eg: "ydl.download", "ydl.download_with_info_file" with yt_dlp.YoutubeDL(ydl_opts) as ydl: - ydl.add_post_processor(MyCustomPP()) - info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') - - # ℹ️ ydl.sanitize_info makes the info json-serializable - print(json.dumps(ydl.sanitize_info(info))) + ydl.download(URLS) ``` -**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above - - # DEPRECATED OPTIONS @@ -1960,8 +2024,7 @@ #### Removed These options were deprecated since 2014 and have now been entirely removed -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s" - -t, --title -o "%(title)s-%(id)s.%(ext)s" - -l, --literal -o accepts literal names + -t, -l, --title, --literal -o "%(title)s-%(id)s.%(ext)s" # CONTRIBUTING See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f339e4cd1c..24991e19b4 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -397,7 +397,8 @@ def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_u # Conflicting options report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None) report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None) - report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl')) + report_conflict('--exec-before-download', 'exec_before_dl_cmd', + '"--exec before_dl:"', 'exec_cmd', val2=opts.exec_cmd.get('before_dl')) report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') @@ -412,7 +413,7 @@ def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_u report_conflict('--embed-subs', 'embedsubtitles') report_conflict('--embed-thumbnail', 'embedthumbnail') report_conflict('--extract-audio', 'extractaudio') - report_conflict('--fixup', 'fixup', val1=(opts.fixup or '').lower() in ('', 'never', 'ignore'), default='never') + report_conflict('--fixup', 'fixup', val1=opts.fixup not in (None, 'never', 'ignore'), default='never') report_conflict('--recode-video', 'recodevideo') report_conflict('--remove-chapters', 'remove_chapters', default=[]) report_conflict('--remux-video', 'remuxvideo') diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 8ad1d9efd8..a5014d9317 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -105,6 +105,7 @@ def _real_extract(self, url): resp = self._parse_json(e.cause.read().decode(), video_id) if resp.get('code') == 'GeoBlocked': self.raise_geo_restricted() + raise fmt_url = traverse_obj(fmt_url_json, ('videoLocation', 'url')) if not fmt_url: diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index ce6dec2f54..fdea3a7ea1 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -83,7 +83,8 @@ def deprecation_warning(self, text): write_string(f'DeprecationWarning: {text}') def report_error(self, text, *args, **kwargs): - # Exists only for compatibility. Do not use + self.deprecation_warning('"yt_dlp.postprocessor.PostProcessor.report_error" is deprecated. ' + 'raise "yt_dlp.utils.PostProcessingError" instead') if self._downloader: return self._downloader.report_error(text, *args, **kwargs)