From d71fd412495af9ebccef807379859a0baa97ddee Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Tue, 15 Mar 2022 12:27:41 +0900 Subject: [PATCH] [fragment] Read downloaded fragments only when needed (#3069) Authored by: Lesmiscore --- yt_dlp/downloader/fragment.py | 46 +++++++++++--------------- yt_dlp/downloader/ism.py | 4 ++- yt_dlp/downloader/mhtml.py | 3 +- yt_dlp/downloader/youtube_live_chat.py | 6 ++-- 4 files changed, 29 insertions(+), 30 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 95fb2f9e79..6b75dfc622 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -133,19 +133,19 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_dat } success = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: - return False, None + return False if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') ctx['fragment_filename_sanitized'] = fragment_filename - try: - return True, self._read_fragment(ctx) - except FileNotFoundError: - if not info_dict.get('is_live'): - raise - return False, None + return True def _read_fragment(self, ctx): - down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + try: + down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + except FileNotFoundError: + if ctx.get('live'): + return None + raise ctx['fragment_filename_sanitized'] = frag_sanitized frag_content = down.read() down.close() @@ -457,7 +457,7 @@ def download_and_append_fragments( def download_fragment(fragment, ctx): if not interrupt_trigger[0]: - return False, fragment['frag_index'] + return frag_index = ctx['fragment_index'] = fragment['frag_index'] ctx['last_error'] = None @@ -467,14 +467,12 @@ def download_fragment(fragment, ctx): headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) # Never skip the first fragment - fatal = is_fatal(fragment.get('index') or (frag_index - 1)) - count, frag_content = 0, None + fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0 while count <= fragment_retries: try: - success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers) - if not success: - return False, frag_index - break + if self._download_fragment(ctx, fragment['url'], info_dict, headers): + break + return except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. @@ -491,13 +489,9 @@ def download_fragment(fragment, ctx): break raise - if count > fragment_retries: - if not fatal: - return False, frag_index + if count > fragment_retries and fatal: ctx['dest_stream'].close() self.report_error('Giving up after %s fragment retries' % fragment_retries) - return False, frag_index - return frag_content, frag_index def append_fragment(frag_content, frag_index, ctx): if not frag_content: @@ -520,23 +514,23 @@ def append_fragment(frag_content, frag_index, ctx): def _download_fragment(fragment): ctx_copy = ctx.copy() - frag_content, frag_index = download_fragment(fragment, ctx_copy) - return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') + download_fragment(fragment, ctx_copy) + return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: - for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): + for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): ctx['fragment_filename_sanitized'] = frag_filename ctx['fragment_index'] = frag_index - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx) if not result: return False else: for fragment in fragments: if not interrupt_trigger[0]: break - frag_content, frag_index = download_fragment(fragment, ctx) - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + download_fragment(fragment, ctx) + result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) if not result: return False diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index 09516abe56..4d5618c836 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -263,9 +263,11 @@ def real_download(self, filename, info_dict): count = 0 while count <= fragment_retries: try: - success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) + success = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False + frag_content = self._read_fragment(ctx) + if not extra_state['ism_track_written']: tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index bc86fd1bf7..54e7117925 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -171,9 +171,10 @@ def real_download(self, filename, info_dict): assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) - success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) + success = self._download_fragment(ctx, fragment_url, info_dict) if not success: continue + frag_content = self._read_fragment(ctx) mime_type = b'image/jpeg' if frag_content.startswith(b'\x89PNG\r\n\x1a\n'): diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index b28d1ec175..cfca686ee4 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -115,9 +115,10 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None count = 0 while count <= fragment_retries: try: - success, raw_fragment = dl_fragment(url, request_data, headers) + success = dl_fragment(url, request_data, headers) if not success: return False, None, None, None + raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: @@ -145,9 +146,10 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None self._prepare_and_start_frag_download(ctx, info_dict) - success, raw_fragment = dl_fragment(info_dict['url']) + success = dl_fragment(info_dict['url']) if not success: return False + raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: