From fc08bdd6ab2abb92d7814d035b34c15cb7006597 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 24 Jan 2022 21:01:17 +0530 Subject: [PATCH] [extractor] Allow non-fatal `title` extraction --- CONTRIBUTING.md | 4 +++- yt_dlp/YoutubeDL.py | 7 ++++++- yt_dlp/extractor/common.py | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ed4bf69d92..c25d6a2a57 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -252,7 +252,9 @@ ### Mandatory and optional metafields - `title` (media title) - `url` (media download URL) or `formats` -The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While, in fact, only `id` is technically mandatory, due to compatibility reasons, yt-dlp also treats `title` as mandatory. The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet. +The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal. + +The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet. [Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4af77cae29..24843c7755 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2299,10 +2299,15 @@ def process_video_result(self, info_dict, download=True): self._num_videos += 1 if 'id' not in info_dict: - raise ExtractorError('Missing "id" field in extractor result') + raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor']) + elif not info_dict.get('id'): + raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor']) if 'title' not in info_dict: raise ExtractorError('Missing "title" field in extractor result', video_id=info_dict['id'], ie=info_dict['extractor']) + elif not info_dict.get('title'): + self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') + info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' def report_force_conversion(field, field_not, conversion): self.report_warning( diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index bd9362827d..a2f160a821 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1291,7 +1291,7 @@ def _og_search_description(self, html, **kargs): return self._og_search_property('description', html, fatal=False, **kargs) def _og_search_title(self, html, **kargs): - return self._og_search_property('title', html, **kargs) + return self._og_search_property('title', html, fatal=False, **kargs) def _og_search_video_url(self, html, name='video url', secure=True, **kargs): regexes = self._og_regexes('video') + self._og_regexes('video:url')