From d7009caa03b48360541e0770a9231ba71f429308 Mon Sep 17 00:00:00 2001
From: shirt-dev <2660574+shirt-dev@users.noreply.github.com>
Date: Wed, 10 Mar 2021 09:39:40 -0500
Subject: [PATCH] Improve HLS/DASH external downloader code (#162)

Authored by: shirt
---
 yt_dlp/YoutubeDL.py           |  4 +---
 yt_dlp/downloader/dash.py     |  8 +++++---
 yt_dlp/downloader/external.py | 23 ++++++++++-------------
 yt_dlp/downloader/hls.py      | 31 ++++++++++++++++++-------------
 4 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index e58f7a32f..5e3c015ba 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2437,9 +2437,7 @@ def correct_ext(filename):
                     else:
                         assert fixup_policy in ('ignore', 'never')
 
-                if (info_dict.get('protocol') == 'm3u8_native'
-                        or info_dict.get('protocol') == 'm3u8'
-                        and self.params.get('hls_prefer_native')):
+                if get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD':
                     if fixup_policy == 'warn':
                         self.report_warning('%s: malformed AAC bitstream detected.' % (
                             info_dict['id']))
diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py
index d758282c1..99acc8db2 100644
--- a/yt_dlp/downloader/dash.py
+++ b/yt_dlp/downloader/dash.py
@@ -37,7 +37,7 @@ def real_download(self, filename, info_dict):
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
-        fragment_urls = []
+        fragments = []
         frag_index = 0
         for i, fragment in enumerate(fragments):
             frag_index += 1
@@ -49,7 +49,9 @@ def real_download(self, filename, info_dict):
                 fragment_url = urljoin(fragment_base_url, fragment['path'])
 
             if real_downloader:
-                fragment_urls.append(fragment_url)
+                fragments.append({
+                    'url': fragment_url,
+                })
                 continue
 
             # In DASH, the first segment contains necessary headers to
@@ -90,7 +92,7 @@ def real_download(self, filename, info_dict):
 
         if real_downloader:
             info_copy = info_dict.copy()
-            info_copy['url_list'] = fragment_urls
+            info_copy['fragments'] = fragments
             fd = real_downloader(self.ydl, self.params)
             # TODO: Make progress updates work without hooking twice
             # for ph in self._progress_hooks:
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index 440603ea3..026a4e382 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -122,18 +122,14 @@ def _call_downloader(self, tmpfilename, info_dict):
         if p.returncode != 0:
             self.to_stderr(stderr.decode('utf-8', 'replace'))
 
-        if 'url_list' in info_dict:
+        if 'fragments' in info_dict:
             file_list = []
-            for [i, url] in enumerate(info_dict['url_list']):
-                tmpsegmentname = '%s_%s.frag' % (tmpfilename, i)
-                file_list.append(tmpsegmentname)
-            key_list = info_dict.get('key_list')
-            decrypt_info = None
             dest, _ = sanitize_open(tmpfilename, 'wb')
-            for i, file in enumerate(file_list):
+            for [i, fragment] in enumerate(info_dict['fragments']):
+                file = '%s_%s.frag' % (tmpfilename, i)
+                decrypt_info = fragment.get('decrypt_info')
                 src, _ = sanitize_open(file, 'rb')
-                if key_list:
-                    decrypt_info = next((x for x in key_list if x['INDEX'] == i), decrypt_info)
+                if decrypt_info:
                     if decrypt_info['METHOD'] == 'AES-128':
                         iv = decrypt_info.get('IV')
                         decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
@@ -149,6 +145,7 @@ def _call_downloader(self, tmpfilename, info_dict):
                     fragment_data = src.read()
                     dest.write(fragment_data)
                 src.close()
+                file_list.append(file)
             dest.close()
             if not self.params.get('keep_fragments', False):
                 for file_path in file_list:
@@ -248,7 +245,7 @@ class Aria2cFD(ExternalFD):
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '-c']
         dn = os.path.dirname(tmpfilename)
-        if 'url_list' not in info_dict:
+        if 'fragments' not in info_dict:
             cmd += ['--out', os.path.basename(tmpfilename)]
         verbose_level_args = ['--console-log-level=warn', '--summary-interval=0']
         cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args)
@@ -262,14 +259,14 @@ def _make_cmd(self, tmpfilename, info_dict):
         cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
         cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
         cmd += ['--auto-file-renaming=false']
-        if 'url_list' in info_dict:
+        if 'fragments' in info_dict:
             cmd += verbose_level_args
             cmd += ['--uri-selector', 'inorder', '--download-result=hide']
             url_list_file = '%s.frag.urls' % tmpfilename
             url_list = []
-            for [i, url] in enumerate(info_dict['url_list']):
+            for [i, fragment] in enumerate(info_dict['fragments']):
                 tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i)
-                url_list.append('%s\n\tout=%s' % (url, tmpsegmentname))
+                url_list.append('%s\n\tout=%s' % (fragment['url'], tmpsegmentname))
             stream, _ = sanitize_open(url_list_file, 'wb')
             stream.write('\n'.join(url_list).encode('utf-8'))
             stream.close()
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index 6f30842a7..29be6bdf9 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -29,7 +29,7 @@ class HlsFD(FragmentFD):
     FD_NAME = 'hlsnative'
 
     @staticmethod
-    def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag):
+    def can_download(manifest, info_dict, allow_unplayable_formats=False, real_downloader=None, with_crypto=can_decrypt_frag):
         UNSUPPORTED_FEATURES = [
             # r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
 
@@ -53,6 +53,10 @@ def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypt
             UNSUPPORTED_FEATURES += [
                 r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1]
             ]
+        if real_downloader:
+            UNSUPPORTED_FEATURES += [
+                r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
+            ]
         check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
         is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
         check_results.append(with_crypto or not is_aes128_enc)
@@ -68,7 +72,9 @@ def real_download(self, filename, info_dict):
         man_url = urlh.geturl()
         s = urlh.read().decode('utf-8', 'ignore')
 
-        if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')):
+        real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
+
+        if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats'), real_downloader):
             if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
                 self.report_error('pycryptodome not found. Please install it.')
                 return False
@@ -83,8 +89,6 @@ def real_download(self, filename, info_dict):
             #     fd.add_progress_hook(ph)
             return fd.real_download(filename, info_dict)
 
-        real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
-
         def is_ad_fragment_start(s):
             return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
                     or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
@@ -93,7 +97,7 @@ def is_ad_fragment_end(s):
             return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
                     or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
 
-        fragment_urls = []
+        fragments = []
 
         media_frags = 0
         ad_frags = 0
@@ -136,7 +140,6 @@ def is_ad_fragment_end(s):
         i = 0
         media_sequence = 0
         decrypt_info = {'METHOD': 'NONE'}
-        key_list = []
         byte_range = {}
         discontinuity_count = 0
         frag_index = 0
@@ -161,7 +164,10 @@ def is_ad_fragment_end(s):
                         frag_url = update_url_query(frag_url, extra_query)
 
                     if real_downloader:
-                        fragment_urls.append(frag_url)
+                        fragments.append({
+                            'url': frag_url,
+                            'decrypt_info': decrypt_info,
+                        })
                         continue
                     download_frag = True
 
@@ -181,7 +187,10 @@ def is_ad_fragment_end(s):
                     if extra_query:
                         frag_url = update_url_query(frag_url, extra_query)
                     if real_downloader:
-                        fragment_urls.append(frag_url)
+                        fragments.append({
+                            'url': frag_url,
+                            'decrypt_info': decrypt_info,
+                        })
                         continue
 
                     if map_info.get('BYTERANGE'):
@@ -206,9 +215,6 @@ def is_ad_fragment_end(s):
                             decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
                         if decrypt_url != decrypt_info['URI']:
                             decrypt_info['KEY'] = None
-                    key_data = decrypt_info.copy()
-                    key_data['INDEX'] = frag_index
-                    key_list.append(key_data)
 
                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
                     media_sequence = int(line[22:])
@@ -275,8 +281,7 @@ def is_ad_fragment_end(s):
 
         if real_downloader:
             info_copy = info_dict.copy()
-            info_copy['url_list'] = fragment_urls
-            info_copy['key_list'] = key_list
+            info_copy['fragments'] = fragments
             fd = real_downloader(self.ydl, self.params)
             # TODO: Make progress updates work without hooking twice
             # for ph in self._progress_hooks: