From 75d43ca08004e711fb37694f7208af35615118d5 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 18 Mar 2021 20:57:20 +0530 Subject: [PATCH] Option to keep private keys in the infojson Options: --clean-infojson, --no-clean-infojson Related: https://github.com/yt-dlp/yt-dlp/issues/42#issuecomment-800778391 --- README.md | 5 +++++ yt_dlp/YoutubeDL.py | 13 ++++++++----- yt_dlp/__init__.py | 1 + yt_dlp/options.py | 10 ++++++++++ yt_dlp/utils.py | 2 +- 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a38d42048f..257d88b97f 100644 --- a/README.md +++ b/README.md @@ -423,6 +423,11 @@ ## Filesystem Options: --write-description etc. (default) --no-write-playlist-metafiles Do not write playlist metadata when using --write-info-json, --write-description etc. + --clean-infojson Remove some private fields such as + filenames from the infojson. Note that it + could still contain some personal + information (default) + --no-clean-infojson Write all fields to the infojson --get-comments Retrieve video comments to be placed in the .info.json file. The comments are fetched even without this option if the extraction diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 7a5ad6d0e7..e79039b5ca 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -216,6 +216,7 @@ class YoutubeDL(object): logtostderr: Log messages to stderr instead of stdout. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file + clean_infojson: Remove private fields from the infojson writecomments: Extract video comments. This will not be written to disk unless writeinfojson is also given writeannotations: Write the video annotations to a .annotations.xml file @@ -1201,7 +1202,7 @@ def __process_playlist(self, ie_result, download): # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn) try: - write_json_file(self.filter_requested_info(playlist_info), infofn) + write_json_file(self.filter_requested_info(playlist_info, self.params.get('clean_infojson', True)), infofn) except (OSError, IOError): self.report_error('Cannot write playlist metadata to JSON file ' + infofn) @@ -2046,7 +2047,7 @@ def print_optional(field): print_mandatory('format') if self.params.get('forcejson', False): self.post_extract(info_dict) - self.to_stdout(json.dumps(info_dict)) + self.to_stdout(json.dumps(info_dict, default=repr)) def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -2215,7 +2216,7 @@ def dl(name, info, subtitle=False): else: self.to_screen('[info] Writing video metadata as JSON to: ' + infofn) try: - write_json_file(self.filter_requested_info(info_dict), infofn) + write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn) except (OSError, IOError): self.report_error('Cannot write video metadata to JSON file ' + infofn) return @@ -2504,7 +2505,7 @@ def download(self, url_list): else: if self.params.get('dump_single_json', False): self.post_extract(res) - self.to_stdout(json.dumps(res)) + self.to_stdout(json.dumps(res, default=repr)) return self._download_retcode @@ -2526,7 +2527,9 @@ def download_with_info_file(self, info_filename): return self._download_retcode @staticmethod - def filter_requested_info(info_dict): + def filter_requested_info(info_dict, actually_filter=True): + if not actually_filter: + return info_dict exceptions = { 'remove': ['requested_formats', 'requested_subtitles', 'filepath', 'entries'], 'keep': ['_type'], diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 15a006d50b..c58362698e 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -491,6 +491,7 @@ def report_args_compat(arg, name): 'writeannotations': opts.writeannotations, 'writeinfojson': opts.writeinfojson, 'allow_playlist_files': opts.allow_playlist_files, + 'clean_infojson': opts.clean_infojson, 'getcomments': opts.getcomments, 'writethumbnail': opts.writethumbnail, 'write_all_thumbnails': opts.write_all_thumbnails, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index e7bf9d5642..5c1908bfed 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -985,6 +985,16 @@ def _dict_from_multiple_values_options_callback( help=( 'Do not write playlist metadata when using ' '--write-info-json, --write-description etc.')) + filesystem.add_option( + '--clean-infojson', + action='store_true', dest='clean_infojson', default=True, + help=( + 'Remove some private fields such as filenames from the infojson. ' + 'Note that it could still contain some personal information (default)')) + filesystem.add_option( + '--no-clean-infojson', + action='store_false', dest='clean_infojson', + help='Write all fields to the infojson') filesystem.add_option( '--get-comments', action='store_true', dest='getcomments', default=False, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a913b98149..27d6495658 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1836,7 +1836,7 @@ def write_json_file(obj, fn): try: with tf: - json.dump(obj, tf) + json.dump(obj, tf, default=repr) if sys.platform == 'win32': # Need to remove existing file on Windows, else os.rename raises # WindowsError or FileExistsError.