From 1732eccc0a40256e076bf0435a29f0f1d8419280 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Sun, 26 Nov 2023 03:12:05 +0100 Subject: [PATCH] [core] Parse `release_year` from `release_date` (#8524) Closes #7263 Authored by: seproDev --- README.md | 2 +- test/helper.py | 6 +++++- yt_dlp/YoutubeDL.py | 3 +++ yt_dlp/extractor/archiveorg.py | 2 -- yt_dlp/extractor/common.py | 4 +++- yt_dlp/extractor/harpodeon.py | 10 +++++----- yt_dlp/extractor/monstercat.py | 2 -- yt_dlp/extractor/youtube.py | 1 - 8 files changed, 17 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 6fe7fab6a2..f67cab572d 100644 --- a/README.md +++ b/README.md @@ -1309,6 +1309,7 @@ # OUTPUT TEMPLATE - `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `release_date` (string): The date (YYYYMMDD) when the video was released in UTC + - `release_year` (numeric): Year (YYYY) when the video or album was released - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified - `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC - `uploader_id` (string): Nickname or id of the video uploader @@ -1382,7 +1383,6 @@ # OUTPUT TEMPLATE - `album_type` (string): Type of the album - `album_artist` (string): List of all artists appeared on the album - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to - - `release_year` (numeric): Year (YYYY) when the album was released Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: diff --git a/test/helper.py b/test/helper.py index 7503840be8..e5ace8fe2c 100644 --- a/test/helper.py +++ b/test/helper.py @@ -10,7 +10,7 @@ import yt_dlp.extractor from yt_dlp import YoutubeDL from yt_dlp.compat import compat_os_name -from yt_dlp.utils import preferredencoding, write_string +from yt_dlp.utils import preferredencoding, try_call, write_string if 'pytest' in sys.modules: import pytest @@ -223,6 +223,10 @@ def sanitize(key, value): if test_info_dict.get('display_id') == test_info_dict.get('id'): test_info_dict.pop('display_id') + # release_year may be generated from release_date + if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): + test_info_dict.pop('release_year') + # Check url for flat entries if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'): test_info_dict['url'] = got_dict['url'] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 32ae25aa01..e65bef862c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2589,6 +2589,9 @@ def _fill_common_fields(self, info_dict, final=True): upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc) info_dict[date_key] = upload_date.strftime('%Y%m%d') + if not info_dict.get('release_year'): + info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])})) + live_keys = ('is_live', 'was_live') live_status = info_dict.get('live_status') if live_status is None: diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index a0b26ac5a0..3bb6f2e311 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -52,7 +52,6 @@ class ArchiveOrgIE(InfoExtractor): 'creator': 'SRI International', 'uploader': 'laura@archive.org', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', - 'release_year': 1968, 'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr', 'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect', @@ -134,7 +133,6 @@ class ArchiveOrgIE(InfoExtractor): 'album': '1977-05-08 - Barton Hall - Cornell University', 'release_date': '19770508', 'display_id': 'gd1977-05-08d01t07.flac', - 'release_year': 1977, 'track_number': 7, }, }, { diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 507ef34cf5..b179f40382 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -286,6 +286,9 @@ class InfoExtractor: If it is not clear whether to use timestamp or this, use the former release_date: The date (YYYYMMDD) when the video was released in UTC. If not explicitly set, calculated from release_timestamp + release_year: Year (YYYY) as integer when the video or album was released. + To be used if no exact release date is known. + If not explicitly set, calculated from release_date. modified_timestamp: UNIX timestamp of the moment the video was last modified. modified_date: The date (YYYYMMDD) when the video was last modified in UTC. If not explicitly set, calculated from modified_timestamp @@ -427,7 +430,6 @@ class InfoExtractor: and compilations). disc_number: Number of the disc or other physical medium the track belongs to, as an integer. - release_year: Year (YYYY) when the album was released. composer: Composer of the piece The following fields should only be set for clips that should be cut from the original video: diff --git a/yt_dlp/extractor/harpodeon.py b/yt_dlp/extractor/harpodeon.py index 0aa47337ff..46eaddb32f 100644 --- a/yt_dlp/extractor/harpodeon.py +++ b/yt_dlp/extractor/harpodeon.py @@ -1,5 +1,5 @@ from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import int_or_none class HarpodeonIE(InfoExtractor): @@ -14,7 +14,7 @@ class HarpodeonIE(InfoExtractor): 'title': 'The Smoking Out of Bella Butts', 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77', 'creator': 'Vitagraph Company of America', - 'release_date': '19150101' + 'release_year': 1915, } }, { 'url': 'https://www.harpodeon.com/preview/The_Smoking_Out_of_Bella_Butts/268068288', @@ -25,7 +25,7 @@ class HarpodeonIE(InfoExtractor): 'title': 'The Smoking Out of Bella Butts', 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77', 'creator': 'Vitagraph Company of America', - 'release_date': '19150101' + 'release_year': 1915, } }, { 'url': 'https://www.harpodeon.com/preview/Behind_the_Screen/421838710', @@ -36,7 +36,7 @@ class HarpodeonIE(InfoExtractor): 'title': 'Behind the Screen', 'description': 'md5:008972a3dc51fba3965ee517d2ba9155', 'creator': 'Lone Star Corporation', - 'release_date': '19160101' + 'release_year': 1916, } }] @@ -66,5 +66,5 @@ def _real_extract(self, url): 'http_headers': {'Referer': url}, 'description': self._html_search_meta('description', webpage, fatal=False), 'creator': creator, - 'release_date': unified_strdate(f'{release_year}0101') + 'release_year': int_or_none(release_year), } diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py index 7f04825fcd..cf5e099691 100644 --- a/yt_dlp/extractor/monstercat.py +++ b/yt_dlp/extractor/monstercat.py @@ -24,7 +24,6 @@ class MonstercatIE(InfoExtractor): 'title': 'The Secret Language of Trees', 'id': '742779548009', 'thumbnail': 'https://www.monstercat.com/release/742779548009/cover', - 'release_year': 2023, 'release_date': '20230711', 'album': 'The Secret Language of Trees', 'album_artist': 'BT', @@ -71,7 +70,6 @@ def _real_extract(self, url): 'thumbnail': f'https://www.monstercat.com/release/{url_id}/cover', 'album_artist': try_call( lambda: get_element_by_class('h-normal text-uppercase mb-desktop-medium mb-smallish', html)), - 'release_year': int_or_none(date[:4]) if date else None, 'release_date': date, } diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f6caf09708..73fe233eff 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2072,7 +2072,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'track': 'Voyeur Girl', 'album': 'it\'s too much love to know my dear', 'release_date': '20190313', - 'release_year': 2019, 'alt_title': 'Voyeur Girl', 'view_count': int, 'playable_in_embed': True,