[fd/fragment] Improve progress calculation (#8241)

This uses the download speed from all threads and also adds smoothing to speed and eta Authored by: Grub4K
2024-11-23 11:31:29 +01:00 · 2023-10-08 02:01:01 +02:00 · 2023-10-08 02:01:01 +02:00 · 1c51c520f7
commit 1c51c520f7
parent 9d7ded6419
2 changed files with 128 additions and 29 deletions
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@ -14,6 +14,7 @@
 from ..networking.exceptions import HTTPError, IncompleteRead
 from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
 from ..utils.networking import HTTPHeaderDict
 from ..utils.progress import ProgressCalculator
 class HttpQuietDownloader(HttpFD):
@ -226,8 +227,7 @@ def _start_frag_download(self, ctx, info_dict):
        resume_len = ctx['complete_frags_downloaded_bytes']
        total_frags = ctx['total_frags']
        ctx_id = ctx.get('ctx_id')
-        # This dict stores the download progress, it's updated by the progress
+        # Stores the download progress, updated by the progress hook
        # hook
        state = {
            'status': 'downloading',
            'downloaded_bytes': resume_len,
@ -237,14 +237,8 @@ def _start_frag_download(self, ctx, info_dict):
            'tmpfilename': ctx['tmpfilename'],
        }
-        start = time.time()
+        ctx['started'] = time.time()
-        ctx.update({
+        progress = ProgressCalculator(resume_len)
            'started': start,
            'fragment_started': start,
            # Amount of fragment's bytes downloaded by the time of the previous
            # frag progress hook invocation
            'prev_frag_downloaded_bytes': 0,
        })
        def frag_progress_hook(s):
            if s['status'] not in ('downloading', 'finished'):
@ -259,38 +253,35 @@ def frag_progress_hook(s):
            state['max_progress'] = ctx.get('max_progress')
            state['progress_idx'] = ctx.get('progress_idx')
-            time_now = time.time()
+            state['elapsed'] = progress.elapsed
            state['elapsed'] = time_now - start
            frag_total_bytes = s.get('total_bytes') or 0
            s['fragment_info_dict'] = s.pop('info_dict', {})
            # XXX: Fragment resume is not accounted for here
            if not ctx['live']:
                estimated_size = (
                    (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
                    / (state['fragment_index'] + 1) * total_frags)
-                state['total_bytes_estimate'] = estimated_size
+                progress.total = estimated_size
                progress.update(s.get('downloaded_bytes'))
                state['total_bytes_estimate'] = progress.total
            else:
                progress.update(s.get('downloaded_bytes'))
            if s['status'] == 'finished':
                state['fragment_index'] += 1
                ctx['fragment_index'] = state['fragment_index']
-                state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
+                progress.thread_reset()
-                ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+
-                ctx['speed'] = state['speed'] = self.calc_speed(
+            state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded
-                    ctx['fragment_started'], time_now, frag_total_bytes)
+            state['speed'] = ctx['speed'] = progress.speed.smooth
-                ctx['fragment_started'] = time.time()
+            state['eta'] = progress.eta.smooth
-                ctx['prev_frag_downloaded_bytes'] = 0
+
            else:
                frag_downloaded_bytes = s['downloaded_bytes']
                state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
                ctx['speed'] = state['speed'] = self.calc_speed(
                    ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
                if not ctx['live']:
                    state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
                ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
            self._hook_progress(state, info_dict)
        ctx['dl'].add_progress_hook(frag_progress_hook)
-        return start
+        return ctx['started']
    def _finish_frag_download(self, ctx, info_dict):
        ctx['dest_stream'].close()
@ -500,7 +491,6 @@ def _download_fragment(fragment):
                download_fragment(fragment, ctx_copy)
                return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
            self.report_warning('The download speed shown is only of one thread. This is a known issue')
            with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
                try:
                    for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
--- a/yt_dlp/utils/progress.py
+++ b/yt_dlp/utils/progress.py
@ -0,0 +1,109 @@
 from __future__ import annotations
 import bisect
 import threading
 import time
 class ProgressCalculator:
    # Time to calculate the speed over (seconds)
    SAMPLING_WINDOW = 3
    # Minimum timeframe before to sample next downloaded bytes (seconds)
    SAMPLING_RATE = 0.05
    # Time before showing eta (seconds)
    GRACE_PERIOD = 1
    def __init__(self, initial: int):
        self._initial = initial or 0
        self.downloaded = self._initial
        self.elapsed: float = 0
        self.speed = SmoothValue(0, smoothing=0.7)
        self.eta = SmoothValue(None, smoothing=0.9)
        self._total = 0
        self._start_time = time.monotonic()
        self._last_update = self._start_time
        self._lock = threading.Lock()
        self._thread_sizes: dict[int, int] = {}
        self._times = [self._start_time]
        self._downloaded = [self.downloaded]
    @property
    def total(self):
        return self._total
    @total.setter
    def total(self, value: int | None):
        with self._lock:
            if value is not None and value < self.downloaded:
                value = self.downloaded
            self._total = value
    def thread_reset(self):
        current_thread = threading.get_ident()
        with self._lock:
            self._thread_sizes[current_thread] = 0
    def update(self, size: int | None):
        if not size:
            return
        current_thread = threading.get_ident()
        with self._lock:
            last_size = self._thread_sizes.get(current_thread, 0)
            self._thread_sizes[current_thread] = size
            self._update(size - last_size)
    def _update(self, size: int):
        current_time = time.monotonic()
        self.downloaded += size
        self.elapsed = current_time - self._start_time
        if self.total is not None and self.downloaded > self.total:
            self._total = self.downloaded
        if self._last_update + self.SAMPLING_RATE > current_time:
            return
        self._last_update = current_time
        self._times.append(current_time)
        self._downloaded.append(self.downloaded)
        offset = bisect.bisect_left(self._times, current_time - self.SAMPLING_WINDOW)
        del self._times[:offset]
        del self._downloaded[:offset]
        if len(self._times) < 2:
            self.speed.reset()
            self.eta.reset()
            return
        download_time = current_time - self._times[0]
        if not download_time:
            return
        self.speed.set((self.downloaded - self._downloaded[0]) / download_time)
        if self.total and self.speed.value and self.elapsed > self.GRACE_PERIOD:
            self.eta.set((self.total - self.downloaded) / self.speed.value)
        else:
            self.eta.reset()
 class SmoothValue:
    def __init__(self, initial: float | None, smoothing: float):
        self.value = self.smooth = self._initial = initial
        self._smoothing = smoothing
    def set(self, value: float):
        self.value = value
        if self.smooth is None:
            self.smooth = self.value
        else:
            self.smooth = (1 - self._smoothing) * value + self._smoothing * self.smooth
    def reset(self):
        self.value = self.smooth = self._initial