From e6f21b3d925ea708955c60c400a31fc2e0e36ac0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 17 Sep 2021 23:53:55 +0530 Subject: [PATCH] [docs,cleanup] Some minor refactoring and improve docs --- Changelog.md | 1 - README.md | 15 ++++++++------ devscripts/lazy_load_template.py | 12 +++++++++--- devscripts/make_lazy_extractors.py | 2 -- test/test_YoutubeDL.py | 2 ++ yt_dlp/YoutubeDL.py | 8 ++++---- yt_dlp/__init__.py | 1 + yt_dlp/cache.py | 2 ++ yt_dlp/compat.py | 2 ++ yt_dlp/extractor/common.py | 6 +++++- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/options.py | 4 ++-- yt_dlp/postprocessor/ffmpeg.py | 29 ++++++++++++++-------------- yt_dlp/postprocessor/sponsorblock.py | 8 ++++---- 14 files changed, 55 insertions(+), 39 deletions(-) diff --git a/Changelog.md b/Changelog.md index 35a1b2680d..7334f87c5e 100644 --- a/Changelog.md +++ b/Changelog.md @@ -41,7 +41,6 @@ ### 2021.09.25 * [Streamanity] Add Extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Theta] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Yandex] Add ZenYandexIE and ZenYandexChannelIE by [Ashish0804](https://github.com/Ashish0804) - * [9Now] handle episodes of series by [dalanmiller](https://github.com/dalanmiller) * [AnimalPlanet] Fix extractor by [Sipherdrakon](https://github.com/Sipherdrakon) * [Arte] Improve description extraction by [renalid](https://github.com/renalid) diff --git a/README.md b/README.md index bbcc183d8a..d219b28d3b 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ # NEW FEATURES * Most (but not all) age-gated content can be downloaded without cookies * Partial workaround for throttling issue * Redirect channel's home URL automatically to `/video` to preserve the old behaviour - * `255kbps` audio is extracted from youtube music if premium cookies are given + * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)) * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[:PROFILE]` @@ -150,7 +150,7 @@ # INSTALLATION yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS You can install yt-dlp using one of the following methods: -* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) (recommended method) +* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) * With Homebrew, `brew install yt-dlp/taps/yt-dlp` * Use [PyPI package](https://pypi.org/project/yt-dlp): `python3 -m pip install --upgrade yt-dlp` * Use pip+git: `python3 -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp.git@release` @@ -195,7 +195,7 @@ ### DEPENDENCIES While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting various data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) @@ -1002,9 +1002,10 @@ # OUTPUT TEMPLATE - `uploader` (string): Full name of the video uploader - `license` (string): License name the video is licensed under - `creator` (string): The creator of the video - - `release_date` (string): The date (YYYYMMDD) when the video was released - `timestamp` (numeric): UNIX timestamp of the moment the video became available - `upload_date` (string): Video upload date (YYYYMMDD) + - `release_date` (string): The date (YYYYMMDD) when the video was released + - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `uploader_id` (string): Nickname or id of the video uploader - `channel` (string): Full name of the channel the video is uploaded on - `channel_id` (string): Id of the channel @@ -1046,8 +1047,10 @@ # OUTPUT TEMPLATE - `extractor_key` (string): Key name of the extractor - `epoch` (numeric): Unix epoch when creating the file - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start` + - `n_entries` (numeric): Total number of extracted items in the playlist - `playlist` (string): Name or id of the playlist that contains the video - - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist + - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index + - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist - `playlist_id` (string): Playlist identifier - `playlist_title` (string): Playlist title - `playlist_uploader` (string): Full name of the playlist uploader @@ -1266,7 +1269,7 @@ ## Sorting Formats All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. -The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `lang,quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. +The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. Note that the default has `codec:vp9.2`; i.e. `av1` is not prefered diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 036e2e767c..da89e070de 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -1,9 +1,15 @@ # coding: utf-8 import re +from ..utils import bug_reports_message, write_string + class LazyLoadMetaClass(type): def __getattr__(cls, name): + if '_real_class' not in cls.__dict__: + write_string( + f'WARNING: Falling back to normal extractor since lazy extractor ' + f'{cls.__name__} does not have attribute {name}{bug_reports_message()}') return getattr(cls._get_real_class(), name) @@ -13,10 +19,10 @@ class LazyLoadExtractor(metaclass=LazyLoadMetaClass): @classmethod def _get_real_class(cls): - if '__real_class' not in cls.__dict__: + if '_real_class' not in cls.__dict__: mod = __import__(cls._module, fromlist=(cls.__name__,)) - cls.__real_class = getattr(mod, cls.__name__) - return cls.__real_class + cls._real_class = getattr(mod, cls.__name__) + return cls._real_class def __new__(cls, *args, **kwargs): real_cls = cls._get_real_class() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index e7b024490c..427045b984 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -7,8 +7,6 @@ from os.path import dirname as dirn import sys -print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) - sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) lazy_extractors_filename = sys.argv[1] diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e746589450..450f254933 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -653,6 +653,7 @@ def test_add_extra_info(self): 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, + 'playlist_autonumber': 2, '_last_playlist_index': 100, 'n_entries': 10, 'formats': [{'id': 'id1'}, {'id': 'id2'}, {'id': 'id3'}] @@ -690,6 +691,7 @@ def test(tmpl, expected, *, info=None, **params): test('%(duration_string)s', ('27:46:40', '27-46-40')) test('%(resolution)s', '1080p') test('%(playlist_index)s', '001') + test('%(playlist_autonumber)s', '02') test('%(autonumber)s', '00001') test('%(autonumber+2)03d', '005', autonumber_start=3) test('%(autonumber)s', '001', autonumber_size=3) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 873c22ad62..c42a29ee3f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -454,13 +454,12 @@ class YoutubeDL(object): _NUMERIC_FIELDS = set(( 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', - 'timestamp', 'upload_year', 'upload_month', 'upload_day', + 'timestamp', 'release_timestamp', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', 'track_number', 'disc_number', 'release_year', - 'playlist_index', )) params = None @@ -579,8 +578,8 @@ def check_deprecated(param, option, suggestion): self._setup_opener() - """Preload the archive, if any is specified""" def preload_download_archive(fn): + """Preload the archive, if any is specified""" if fn is None: return False self.write_debug('Loading archive file %r\n' % fn) @@ -934,10 +933,11 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) - # For fields playlist_index and autonumber convert all occurrences + # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')), + 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')), 'autonumber': self.params.get('autonumber_size') or 5, } diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 2ae08f154e..38e1d0ec65 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -513,6 +513,7 @@ def report_conflict(arg1, arg2): 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, }) + # Note: Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index dde9cca646..e5cb193bce 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -50,6 +50,7 @@ def store(self, section, key, data, dtype='json'): except OSError as ose: if ose.errno != errno.EEXIST: raise + self._ydl.write_debug(f'Saving {section}.{key} to cache') write_json_file(data, fn) except Exception: tb = traceback.format_exc() @@ -66,6 +67,7 @@ def load(self, section, key, dtype='json', default=None): try: try: with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + self._ydl.write_debug(f'Loading {section}.{key} from cache') return json.load(cachef) except ValueError: try: diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 7b55b7d9d4..9bf05c7373 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -33,6 +33,8 @@ class compat_HTMLParseError(Exception): pass +# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE +# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines def compat_ctypes_WINFUNCTYPE(*args, **kwargs): return ctypes.WINFUNCTYPE(*args, **kwargs) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 114b1faaf2..5da29dc63d 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -406,6 +406,10 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. + Subclasses may also override suitable() if necessary, but ensure the function + signature is preserved and that this function imports everything it needs + (except other extractors), so that lazy_extractors works correctly + _GEO_BYPASS attribute may be set to False in order to disable geo restriction bypass mechanisms for a particular extractor. Though it won't disable explicit geo restriction bypass based on @@ -421,7 +425,7 @@ class InfoExtractor(object): will be used by geo restriction bypass mechanism similarly to _GEO_COUNTRIES. - Finally, the _WORKING attribute should be set to False for broken IEs + The _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 272bdb0597..159b0a3b9d 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -621,7 +621,7 @@ def _extract_account_syncid(*args): return delegated_sid sync_ids = (try_get( data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], - lambda x: x['DATASYNC_ID']), compat_str) or '').split("||") + lambda x: x['DATASYNC_ID']), compat_str) or '').split('||') if len(sync_ids) >= 2 and sync_ids[1]: # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel # and just "user_syncid||" for primary channel. We only want the channel_syncid diff --git a/yt_dlp/options.py b/yt_dlp/options.py index daf4c0041c..be43f37ee1 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -261,7 +261,7 @@ def _dict_from_options_callback( general.add_option( '--mark-watched', action='store_true', dest='mark_watched', default=False, - help='Mark videos watched (YouTube only)') + help='Mark videos watched (even with --simulate). Currently only supported for YouTube') general.add_option( '--no-mark-watched', action='store_false', dest='mark_watched', @@ -768,7 +768,7 @@ def _dict_from_options_callback( dest='encoding', metavar='ENCODING', help='Force the specified encoding (experimental)') workarounds.add_option( - '--no-check-certificate', + '--no-check-certificates', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation') workarounds.add_option( diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 058926929f..311170920c 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -478,7 +478,7 @@ def run(self, information): class FFmpegVideoConvertorPP(FFmpegPostProcessor): SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus') FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS))) - _action = 'converting' + _ACTION = 'converting' def __init__(self, downloader=None, preferedformat=None): super(FFmpegVideoConvertorPP, self).__init__(downloader) @@ -497,29 +497,28 @@ def _options(target_ext): return [] @PostProcessor._restrict_to(images=False) - def run(self, information): - path, source_ext = information['filepath'], information['ext'].lower() + def run(self, info): + filename, source_ext = info['filepath'], info['ext'].lower() target_ext = self._target_ext(source_ext) _skip_msg = ( - 'could not find a mapping for %s' if not target_ext - else 'already is in target format %s' if source_ext == target_ext + f'could not find a mapping for {source_ext}' if not target_ext + else f'already is in target format {source_ext}' if source_ext == target_ext else None) if _skip_msg: - self.to_screen('Not %s media file "%s"; %s' % (self._action, path, _skip_msg % source_ext)) - return [], information + self.to_screen(f'Not {self._ACTION} media file {filename!r}; {_skip_msg}') + return [], info - prefix, sep, oldext = path.rpartition('.') - outpath = prefix + sep + target_ext - self.to_screen('%s video from %s to %s; Destination: %s' % (self._action.title(), source_ext, target_ext, outpath)) - self.run_ffmpeg(path, outpath, self._options(target_ext)) + outpath = replace_extension(filename, target_ext, source_ext) + self.to_screen(f'{self._ACTION.title()} video from {source_ext} to {target_ext}; Destination: {outpath}') + self.run_ffmpeg(filename, outpath, self._options(target_ext)) - information['filepath'] = outpath - information['format'] = information['ext'] = target_ext - return [path], information + info['filepath'] = outpath + info['format'] = info['ext'] = target_ext + return [filename], info class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP): - _action = 'remuxing' + _ACTION = 'remuxing' @staticmethod def _options(target_ext): diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 6264d45c5d..7265a9de7c 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -4,7 +4,7 @@ from .ffmpeg import FFmpegPostProcessor from ..compat import compat_urllib_parse_urlencode, compat_HTTPError -from ..utils import PostProcessingError, sanitized_Request +from ..utils import PostProcessingError, network_exceptions, sanitized_Request class SponsorBlockPP(FFmpegPostProcessor): @@ -88,9 +88,9 @@ def _get_json(self, url): self.write_debug(f'SponsorBlock query: {url}') try: rsp = self._downloader.urlopen(sanitized_Request(url)) - except compat_HTTPError as e: - if e.code == 404: + except network_exceptions as e: + if isinstance(e, compat_HTTPError) and e.code == 404: return [] - raise PostProcessingError(f'Error communicating with SponsorBlock API - {e}') + raise PostProcessingError(f'Unable to communicate with SponsorBlock API - {e}') return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))