[fragment] Read downloaded fragments only when needed (#3069)

Authored by: Lesmiscore
This commit is contained in:
Lesmiscore (Naoya Ozaki) 2022-03-15 12:27:41 +09:00 committed by GitHub
parent d69e55c1d8
commit d71fd41249
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 29 additions and 30 deletions

View File

@ -133,19 +133,19 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_dat
} }
success = ctx['dl'].download(fragment_filename, fragment_info_dict) success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success: if not success:
return False, None return False
if fragment_info_dict.get('filetime'): if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime') ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
ctx['fragment_filename_sanitized'] = fragment_filename ctx['fragment_filename_sanitized'] = fragment_filename
try: return True
return True, self._read_fragment(ctx)
except FileNotFoundError:
if not info_dict.get('is_live'):
raise
return False, None
def _read_fragment(self, ctx): def _read_fragment(self, ctx):
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') try:
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
except FileNotFoundError:
if ctx.get('live'):
return None
raise
ctx['fragment_filename_sanitized'] = frag_sanitized ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read() frag_content = down.read()
down.close() down.close()
@ -457,7 +457,7 @@ def download_and_append_fragments(
def download_fragment(fragment, ctx): def download_fragment(fragment, ctx):
if not interrupt_trigger[0]: if not interrupt_trigger[0]:
return False, fragment['frag_index'] return
frag_index = ctx['fragment_index'] = fragment['frag_index'] frag_index = ctx['fragment_index'] = fragment['frag_index']
ctx['last_error'] = None ctx['last_error'] = None
@ -467,14 +467,12 @@ def download_fragment(fragment, ctx):
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
# Never skip the first fragment # Never skip the first fragment
fatal = is_fatal(fragment.get('index') or (frag_index - 1)) fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0
count, frag_content = 0, None
while count <= fragment_retries: while count <= fragment_retries:
try: try:
success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers) if self._download_fragment(ctx, fragment['url'], info_dict, headers):
if not success: break
return False, frag_index return
break
except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err: except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
# Unavailable (possibly temporary) fragments may be served. # Unavailable (possibly temporary) fragments may be served.
# First we try to retry then either skip or abort. # First we try to retry then either skip or abort.
@ -491,13 +489,9 @@ def download_fragment(fragment, ctx):
break break
raise raise
if count > fragment_retries: if count > fragment_retries and fatal:
if not fatal:
return False, frag_index
ctx['dest_stream'].close() ctx['dest_stream'].close()
self.report_error('Giving up after %s fragment retries' % fragment_retries) self.report_error('Giving up after %s fragment retries' % fragment_retries)
return False, frag_index
return frag_content, frag_index
def append_fragment(frag_content, frag_index, ctx): def append_fragment(frag_content, frag_index, ctx):
if not frag_content: if not frag_content:
@ -520,23 +514,23 @@ def append_fragment(frag_content, frag_index, ctx):
def _download_fragment(fragment): def _download_fragment(fragment):
ctx_copy = ctx.copy() ctx_copy = ctx.copy()
frag_content, frag_index = download_fragment(fragment, ctx_copy) download_fragment(fragment, ctx_copy)
return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
ctx['fragment_filename_sanitized'] = frag_filename ctx['fragment_filename_sanitized'] = frag_filename
ctx['fragment_index'] = frag_index ctx['fragment_index'] = frag_index
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
if not result: if not result:
return False return False
else: else:
for fragment in fragments: for fragment in fragments:
if not interrupt_trigger[0]: if not interrupt_trigger[0]:
break break
frag_content, frag_index = download_fragment(fragment, ctx) download_fragment(fragment, ctx)
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
if not result: if not result:
return False return False

View File

@ -263,9 +263,11 @@ def real_download(self, filename, info_dict):
count = 0 count = 0
while count <= fragment_retries: while count <= fragment_retries:
try: try:
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) success = self._download_fragment(ctx, segment['url'], info_dict)
if not success: if not success:
return False return False
frag_content = self._read_fragment(ctx)
if not extra_state['ism_track_written']: if not extra_state['ism_track_written']:
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]

View File

@ -171,9 +171,10 @@ def real_download(self, filename, info_dict):
assert fragment_base_url assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path']) fragment_url = urljoin(fragment_base_url, fragment['path'])
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) success = self._download_fragment(ctx, fragment_url, info_dict)
if not success: if not success:
continue continue
frag_content = self._read_fragment(ctx)
mime_type = b'image/jpeg' mime_type = b'image/jpeg'
if frag_content.startswith(b'\x89PNG\r\n\x1a\n'): if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):

View File

@ -115,9 +115,10 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
count = 0 count = 0
while count <= fragment_retries: while count <= fragment_retries:
try: try:
success, raw_fragment = dl_fragment(url, request_data, headers) success = dl_fragment(url, request_data, headers)
if not success: if not success:
return False, None, None, None return False, None, None, None
raw_fragment = self._read_fragment(ctx)
try: try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError: except RegexNotFoundError:
@ -145,9 +146,10 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
self._prepare_and_start_frag_download(ctx, info_dict) self._prepare_and_start_frag_download(ctx, info_dict)
success, raw_fragment = dl_fragment(info_dict['url']) success = dl_fragment(info_dict['url'])
if not success: if not success:
return False return False
raw_fragment = self._read_fragment(ctx)
try: try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError: except RegexNotFoundError: