Merge branch 'yt-dlp:master' into pr/live-sections

2024-12-02 08:28:21 +01:00 · 2024-05-10 13:52:35 -05:00 · 2024-05-10 13:52:35 -05:00 · 172dfbeaed
commit 172dfbeaed
parent cf96b24de6 98d71d8c5e
37 changed files with 676 additions and 326 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -12,6 +12,9 @@ on:
      unix:
        default: true
        type: boolean
+      linux_static:
+        default: true
+        type: boolean
      linux_arm:
        default: true
        type: boolean
@ -27,9 +30,6 @@ on:
      windows32:
        default: true
        type: boolean
-      meta_files:
-        default: true
-        type: boolean
      origin:
        required: false
        default: ''
@ -52,7 +52,11 @@ on:
        default: stable
        type: string
      unix:
-        description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip
+        description: yt-dlp, yt-dlp.tar.gz
+        default: true
+        type: boolean
+      linux_static:
+        description: yt-dlp_linux
        default: true
        type: boolean
      linux_arm:
@ -75,10 +79,6 @@ on:
        description: yt-dlp_x86.exe
        default: true
        type: boolean
-      meta_files:
-        description: SHA2-256SUMS, SHA2-512SUMS, _update_spec
-        default: true
-        type: boolean
      origin:
        description: Origin
        required: false
@ -112,27 +112,9 @@ jobs:
      - uses: actions/setup-python@v5
        with:
          python-version: "3.10"
-      - uses: conda-incubator/setup-miniconda@v3
-        with:
-          miniforge-variant: Mambaforge
-          use-mamba: true
-          channels: conda-forge
-          auto-update-conda: true
-          activate-environment: ""
-          auto-activate-base: false
      - name: Install Requirements
        run: |
          sudo apt -y install zip pandoc man sed
-          cat > ./requirements.txt << EOF
-          python=3.10.*
-          pyinstaller
-          brotli-python
-          EOF
-          python devscripts/install_deps.py --print \
-            --exclude brotli --exclude brotlicffi \
-            --include secretstorage >> ./requirements.txt
-          mamba create -n build --file ./requirements.txt
-
      - name: Prepare
        run: |
          python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
@ -141,30 +123,15 @@ jobs:
      - name: Build Unix platform-independent binary
        run: |
          make all tar
-      - name: Build Unix standalone binary
-        shell: bash -l {0}
-        run: |
-          unset LD_LIBRARY_PATH  # Harmful; set by setup-python
-          conda activate build
-          python -m bundle.pyinstaller --onedir
-          (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
-          python -m bundle.pyinstaller
-          mv ./dist/yt-dlp_linux ./yt-dlp_linux
-          mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
-
      - name: Verify --update-to
        if: vars.UPDATE_TO_VERIFICATION
        run: |
-          binaries=("yt-dlp" "yt-dlp_linux")
-          for binary in "${binaries[@]}"; do
-            chmod +x ./${binary}
-            cp ./${binary} ./${binary}_downgraded
-            version="$(./${binary} --version)"
-            ./${binary}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
-            downgraded_version="$(./${binary}_downgraded --version)"
-            [[ "$version" != "$downgraded_version" ]]
-          done
-
+          chmod +x ./yt-dlp
+          cp ./yt-dlp ./yt-dlp_downgraded
+          version="$(./yt-dlp --version)"
+          ./yt-dlp_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+          downgraded_version="$(./yt-dlp_downgraded --version)"
+          [[ "$version" != "$downgraded_version" ]]
      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
@ -172,8 +139,39 @@ jobs:
          path: |
            yt-dlp
            yt-dlp.tar.gz
-            yt-dlp_linux
-            yt-dlp_linux.zip
+          compression-level: 0
+
+  linux_static:
+    needs: process
+    if: inputs.linux_static
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build static executable
+        env:
+          channel: ${{ inputs.channel }}
+          origin: ${{ needs.process.outputs.origin }}
+          version: ${{ inputs.version }}
+        run: |
+          mkdir ~/build
+          cd bundle/docker
+          docker compose up --build static
+          sudo chown "${USER}:docker" ~/build/yt-dlp_linux
+      - name: Verify --update-to
+        if: vars.UPDATE_TO_VERIFICATION
+        run: |
+          chmod +x ~/build/yt-dlp_linux
+          cp ~/build/yt-dlp_linux ~/build/yt-dlp_linux_downgraded
+          version="$(~/build/yt-dlp_linux --version)"
+          ~/build/yt-dlp_linux_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+          downgraded_version="$(~/build/yt-dlp_linux_downgraded --version)"
+          [[ "$version" != "$downgraded_version" ]]
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-bin-${{ github.job }}
+          path: |
+            ~/build/yt-dlp_linux
          compression-level: 0

  linux_arm:
@ -254,7 +252,7 @@ jobs:
          # We need to fuse our own universal2 wheels for curl_cffi
          python3 -m pip install -U --user delocate
          mkdir curl_cffi_whls curl_cffi_universal2
-          python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt
+          python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
          for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
            python3 -m pip download \
              --only-binary=:all: \
@ -300,7 +298,7 @@ jobs:
  macos_legacy:
    needs: process
    if: inputs.macos_legacy
-    runs-on: macos-latest
+    runs-on: macos-12

    steps:
      - uses: actions/checkout@v4
@ -362,7 +360,7 @@ jobs:
      - name: Install Requirements
        run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
          python devscripts/install_deps.py -o --include build
-          python devscripts/install_deps.py --include py2exe --include curl_cffi
+          python devscripts/install_deps.py --include py2exe --include curl-cffi
          python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"

      - name: Prepare
@ -447,10 +445,11 @@ jobs:
          compression-level: 0

  meta_files:
-    if: inputs.meta_files && always() && !cancelled()
+    if: always() && !cancelled()
    needs:
      - process
      - unix
+      - linux_static
      - linux_arm
      - macos
      - macos_legacy
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@ -53,7 +53,7 @@ jobs:
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install test requirements
-      run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi
+      run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
    - name: Run tests
      continue-on-error: False
      run: |
--- a/README.md
+++ b/README.md
@ -202,7 +202,7 @@ #### Impersonation
 The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. 

 * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
-  * Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]`
+  * Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
  * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds


--- a/bundle/docker/compose.yml
+++ b/bundle/docker/compose.yml
@ -0,0 +1,10 @@
+services:
+  static:
+    build: static
+    environment:
+      channel: ${channel}
+      origin: ${origin}
+      version: ${version}
+    volumes:
+      - ~/build:/build
+      - ../..:/yt-dlp
--- a/bundle/docker/static/Dockerfile
+++ b/bundle/docker/static/Dockerfile
@ -0,0 +1,21 @@
+FROM alpine:3.19 as base
+
+RUN apk --update add --no-cache \
+        build-base \
+        python3 \
+        pipx \
+    ;
+
+RUN pipx install pyinstaller
+# Requires above step to prepare the shared venv
+RUN ~/.local/share/pipx/shared/bin/python -m pip install -U wheel
+RUN apk --update add --no-cache \
+        scons \
+        patchelf \
+        binutils \
+    ;
+RUN pipx install staticx
+
+WORKDIR /yt-dlp
+COPY entrypoint.sh /entrypoint.sh
+ENTRYPOINT /entrypoint.sh
--- a/bundle/docker/static/entrypoint.sh
+++ b/bundle/docker/static/entrypoint.sh
@ -0,0 +1,13 @@
+#!/bin/ash
+set -e
+
+source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
+python -m devscripts.install_deps --include secretstorage
+python -m devscripts.make_lazy_extractors
+python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
+python -m bundle.pyinstaller
+deactivate
+
+source ~/.local/share/pipx/venvs/staticx/bin/activate
+staticx /yt-dlp/dist/yt-dlp_linux /build/yt-dlp_linux
+deactivate
--- a/pyproject.toml
+++ b/pyproject.toml
@ -53,7 +53,7 @@ dependencies = [

 [project.optional-dependencies]
 default = []
-curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
+curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
 secretstorage = [
    "cffi",
    "secretstorage",
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@ -1906,6 +1906,15 @@ def test_response_with_expected_status_returns_content(self):
            expected_status=TEAPOT_RESPONSE_STATUS)
        self.assertEqual(content, TEAPOT_RESPONSE_BODY)

+    def test_search_nextjs_data(self):
+        data = '<script id="__NEXT_DATA__" type="application/json">{"props":{}}</script>'
+        self.assertEqual(self.ie._search_nextjs_data(data, None), {'props': {}})
+        self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
+        self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
+        self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
+        with self.assertRaises(DeprecationWarning):
+            self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
+

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_networking.py
+++ b/test/test_networking.py
@ -785,6 +785,25 @@ def test_supported_impersonate_targets(self, handler):
                assert res.status == 200
                assert std_headers['user-agent'].lower() not in res.read().decode().lower()

+    def test_response_extensions(self, handler):
+        with handler() as rh:
+            for target in rh.supported_targets:
+                request = Request(
+                    f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target})
+                res = validate_and_send(rh, request)
+                assert res.extensions['impersonate'] == rh._get_request_target(request)
+
+    def test_http_error_response_extensions(self, handler):
+        with handler() as rh:
+            for target in rh.supported_targets:
+                request = Request(
+                    f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target})
+                try:
+                    validate_and_send(rh, request)
+                except HTTPError as e:
+                    res = e.response
+                assert res.extensions['impersonate'] == rh._get_request_target(request)
+

 class TestRequestHandlerMisc:
    """Misc generic tests for request handlers, not related to request or validation testing"""
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -2064,7 +2064,22 @@ def test_extract_basic_auth(self):
        assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')

    @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
-    def test_Popen_windows_escaping(self):
+    def test_windows_escaping(self):
+        tests = [
+            'test"&',
+            '%CMDCMDLINE:~-1%&',
+            'a\nb',
+            '"',
+            '\\',
+            '!',
+            '^!',
+            'a \\ b',
+            'a \\" b',
+            'a \\ b\\',
+            # We replace \r with \n
+            ('a\r\ra', 'a\n\na'),
+        ]
+
        def run_shell(args):
            stdout, stderr, error = Popen.run(
                args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@ -2072,15 +2087,15 @@ def run_shell(args):
            assert not error
            return stdout

-        # Test escaping
-        assert run_shell(['echo', 'test"&']) == '"test""&"\n'
-        assert run_shell(['echo', '%CMDCMDLINE:~-1%&']) == '"%CMDCMDLINE:~-1%&"\n'
-        assert run_shell(['echo', 'a\nb']) == '"a"\n"b"\n'
-        assert run_shell(['echo', '"']) == '""""\n'
-        assert run_shell(['echo', '\\']) == '\\\n'
-        # Test if delayed expansion is disabled
-        assert run_shell(['echo', '^!']) == '"^!"\n'
-        assert run_shell('echo "^!"') == '"^!"\n'
+        for argument in tests:
+            if isinstance(argument, str):
+                expected = argument
+            else:
+                argument, expected = argument
+
+            args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
+            assert run_shell(args) == expected
+            assert run_shell(shell_quote(args, shell=True)) == expected


 if __name__ == '__main__':
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -2141,6 +2141,11 @@ def _filter(f):

    def _check_formats(self, formats):
        for f in formats:
+            working = f.get('__working')
+            if working is not None:
+                if working:
+                    yield f
+                continue
            self.to_screen('[info] Testing format %s' % f['format_id'])
            path = self.get_output_path('temp')
            if not self._ensure_dir_exists(f'{path}/'):
@ -2157,33 +2162,44 @@ def _check_formats(self, formats):
                        os.remove(temp_file.name)
                    except OSError:
                        self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
+            f['__working'] = success
            if success:
                yield f
            else:
                self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

+    def _select_formats(self, formats, selector):
+        return list(selector({
+            'formats': formats,
+            'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
+            'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
+                                   or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
+        }))
+
    def _default_format_spec(self, info_dict, download=True):
+        download = download and not self.params.get('simulate')
+        prefer_best = download and (
+            self.params['outtmpl']['default'] == '-'
+            or info_dict.get('is_live') and not self.params.get('live_from_start'))

        def can_merge():
            merger = FFmpegMergerPP(self)
            return merger.available and merger.can_merge()

-        prefer_best = (
-            not self.params.get('simulate')
-            and download
-            and (
-                not can_merge()
-                or info_dict.get('is_live') and not self.params.get('live_from_start')
-                or self.params['outtmpl']['default'] == '-'))
-        compat = (
-            prefer_best
-            or self.params.get('allow_multiple_audio_streams', False)
-            or 'format-spec' in self.params['compat_opts'])
+        if not prefer_best and download and not can_merge():
+            prefer_best = True
+            formats = self._get_formats(info_dict)
+            evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
+            if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
+                self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
+                                    'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')

-        return (
-            'best/bestvideo+bestaudio' if prefer_best
-            else 'bestvideo*+bestaudio/best' if not compat
-            else 'bestvideo+bestaudio/best')
+        compat = (self.params.get('allow_multiple_audio_streams')
+                  or 'format-spec' in self.params['compat_opts'])
+
+        return ('best/bestvideo+bestaudio' if prefer_best
+                else 'bestvideo+bestaudio/best' if compat
+                else 'bestvideo*+bestaudio/best')

    def build_format_selector(self, format_spec):
        def syntax_error(note, start):
@ -2933,12 +2949,7 @@ def is_wellformed(f):
                self.write_debug(f'Default format spec: {req_format}')
                format_selector = self.build_format_selector(req_format)

-            formats_to_download = list(format_selector({
-                'formats': formats,
-                'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
-                'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
-                                       or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
-            }))
+            formats_to_download = self._select_formats(formats, format_selector)
            if interactive_format_selection and not formats_to_download:
                self.report_error('Requested format is not available', tb=False, is_error=False)
                continue
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -387,7 +387,11 @@
    ComedyCentralIE,
    ComedyCentralTVIE,
 )
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonmistakes import (
+    BlobIE,
+    CommonMistakesIE,
+    UnicodeBOMIE,
+)
 from .commonprotocols import (
    MmsIE,
    RtmpIE,
--- a/yt_dlp/extractor/asobistage.py
+++ b/yt_dlp/extractor/asobistage.py
@ -105,7 +105,7 @@ def _real_extract(self, url):
        video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
        webpage = self._download_webpage(url, video_id)
        event_data = traverse_obj(
-            self._search_nextjs_data(webpage, video_id, default='{}'),
+            self._search_nextjs_data(webpage, video_id, default={}),
            ('props', 'pageProps', 'eventCMSData', {
                'title': ('event_name', {str}),
                'thumbnail': ('event_thumbnail_image', {url_or_none}),
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -93,11 +93,11 @@ def extract_formats(self, play_info):

        return formats

-    def _download_playinfo(self, video_id, cid):
+    def _download_playinfo(self, video_id, cid, headers=None):
        return self._download_json(
            'https://api.bilibili.com/x/player/playurl', video_id,
            query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
-            note=f'Downloading video formats for cid {cid}')['data']
+            note=f'Downloading video formats for cid {cid}', headers=headers)['data']

    def json2srt(self, json_data):
        srt_data = ''
@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage, urlh = self._download_webpage_handle(url, video_id)
+        headers = self.geo_verification_headers()
+        webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
        if not self._match_valid_url(urlh.url):
            return self.url_result(urlh.url)

@ -531,7 +532,7 @@ def _real_extract(self, url):
            self._download_json(
                'https://api.bilibili.com/x/player/pagelist', video_id,
                fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
-                note='Extracting videos in anthology'),
+                note='Extracting videos in anthology', headers=headers),
            'data', expected_type=list) or []
        is_anthology = len(page_list_json) > 1

@ -552,7 +553,7 @@ def _real_extract(self, url):

        festival_info = {}
        if is_festival:
-            play_info = self._download_playinfo(video_id, cid)
+            play_info = self._download_playinfo(video_id, cid, headers=headers)

            festival_info = traverse_obj(initial_state, {
                'uploader': ('videoInfo', 'upName'),
@ -666,14 +667,15 @@ class BiliBiliBangumiIE(BilibiliBaseIE):

    def _real_extract(self, url):
        episode_id = self._match_id(url)
-        webpage = self._download_webpage(url, episode_id)
+        headers = self.geo_verification_headers()
+        webpage = self._download_webpage(url, episode_id, headers=headers)

        if '您所在的地区无法观看本片' in webpage:
            raise GeoRestrictedError('This video is restricted')
        elif '正在观看预览，大会员免费看全片' in webpage:
            self.raise_login_required('This video is for premium members only')

-        headers = {'Referer': url, **self.geo_verification_headers()}
+        headers['Referer'] = url
        play_info = self._download_json(
            'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
            'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
@ -724,7 +726,7 @@ def _real_extract(self, url):
            'duration': float_or_none(play_info.get('timelength'), scale=1000),
            'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
            '__post_extractor': self.extract_comments(aid),
-            'http_headers': headers,
+            'http_headers': {'Referer': url},
        }


@ -1049,9 +1051,10 @@ def fetch_page(page_idx):
                    raise ExtractorError(
                        'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
                raise
-            if response['code'] == -401:
+            if response['code'] in (-352, -401):
                raise ExtractorError(
-                    'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+                    f'Request is blocked by server ({-response["code"]}), '
+                    'please add cookies, wait and try later.', expected=True)
            return response['data']

        def get_metadata(page_data):
--- a/yt_dlp/extractor/boosty.py
+++ b/yt_dlp/extractor/boosty.py
@ -1,7 +1,11 @@
+import json
+import urllib.parse
+
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
    ExtractorError,
+    bug_reports_message,
    int_or_none,
    qualities,
    str_or_none,
@ -162,9 +166,19 @@ def _extract_formats(self, player_urls, video_id):

    def _real_extract(self, url):
        user, post_id = self._match_valid_url(url).group('user', 'post_id')
+
+        auth_headers = {}
+        auth_cookie = self._get_cookies('https://boosty.to/').get('auth')
+        if auth_cookie is not None:
+            try:
+                auth_data = json.loads(urllib.parse.unquote(auth_cookie.value))
+                auth_headers['Authorization'] = f'Bearer {auth_data["accessToken"]}'
+            except (json.JSONDecodeError, KeyError):
+                self.report_warning(f'Failed to extract token from auth cookie{bug_reports_message()}')
+
        post = self._download_json(
            f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
-            note='Downloading post data', errnote='Unable to download post data')
+            note='Downloading post data', errnote='Unable to download post data', headers=auth_headers)

        post_title = post.get('title')
        if not post_title:
@ -202,7 +216,9 @@ def _real_extract(self, url):
                        'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
                    }, get_all=False)})

-        if not entries:
+        if not entries and not post.get('hasAccess'):
+            self.raise_login_required('This post requires a subscription', metadata_available=True)
+        elif not entries:
            raise ExtractorError('No videos found', expected=True)
        if len(entries) == 1:
            return entries[0]
--- a/yt_dlp/extractor/canalalpha.py
+++ b/yt_dlp/extractor/canalalpha.py
@ -40,7 +40,7 @@ class CanalAlphaIE(InfoExtractor):
            'id': '24484',
            'ext': 'mp4',
            'title': 'Ces innovations qui veulent rendre l’agriculture plus durable',
-            'description': 'md5:3de3f151180684621e85be7c10e4e613',
+            'description': 'md5:85d594a3b5dc6ccfc4a85aba6e73b129',
            'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg',
            'upload_date': '20211026',
            'duration': 360,
@ -58,14 +58,25 @@ class CanalAlphaIE(InfoExtractor):
            'duration': 360,
        },
        'params': {'skip_download': True}
+    }, {
+        'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura',
+        'info_dict': {
+            'id': '33500',
+            'ext': 'mp4',
+            'title': 'Encore des mesures d\'économie dans le Jura',
+            'description': 'md5:938b5b556592f2d1b9ab150268082a80',
+            'thumbnail': 'https://static.canalalpha.ch/poster/news/news_46665.jpg',
+            'upload_date': '20240411',
+            'duration': 105,
+        },
    }]

    def _real_extract(self, url):
-        id = self._match_id(url)
-        webpage = self._download_webpage(url, id)
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
        data_json = self._parse_json(self._search_regex(
            r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;',
-            webpage, 'data_json'), id)['1']['data']['data']
+            webpage, 'data_json'), video_id)['1']['data']['data']
        manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {}
        subtitles = {}
        formats = [{
@ -75,15 +86,17 @@ def _real_extract(self, url):
            'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
        } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
        if manifests.get('hls'):
-            m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id)
-            formats.extend(m3u8_frmts)
-            subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+            fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                manifests['hls'], video_id, m3u8_id='hls', fatal=False)
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
        if manifests.get('dash'):
-            dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
-            formats.extend(dash_frmts)
-            subtitles = self._merge_subtitles(subtitles, dash_subs)
+            fmts, subs = self._extract_mpd_formats_and_subtitles(
+                manifests['dash'], video_id, mpd_id='dash', fatal=False)
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
        return {
-            'id': id,
+            'id': video_id,
            'title': data_json.get('title').strip(),
            'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))),
            'thumbnail': data_json.get('poster'),
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@ -151,7 +151,7 @@ def _real_extract(self, url):

 class CBCPlayerIE(InfoExtractor):
    IE_NAME = 'cbc.ca:player'
-    _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
+    _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
    _TESTS = [{
        'url': 'http://www.cbc.ca/player/play/2683190193',
        'md5': '64d25f841ddf4ddb28a235338af32e2c',
@ -277,6 +277,28 @@ class CBCPlayerIE(InfoExtractor):
            'location': 'Canada',
            'media_type': 'Full Program',
        },
+    }, {
+        'url': 'https://www.cbc.ca/player/play/video/1.7194274',
+        'md5': '188b96cf6bdcb2540e178a6caa957128',
+        'info_dict': {
+            'id': '2334524995812',
+            'ext': 'mp4',
+            'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
+            'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
+            'timestamp': 1714788791,
+            'duration': 77.678,
+            'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
+            'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
+            'uploader': 'CBCC-NEW',
+            'chapters': 'count:0',
+            'upload_date': '20240504',
+            'categories': 'count:3',
+            'series': 'The National',
+            'tags': 'count:15',
+            'creators': ['encoder'],
+            'location': 'Canada',
+            'media_type': 'Excerpt',
+        },
    }, {
        'url': 'cbcplayer:1.7159484',
        'only_matching': True,
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -1738,12 +1738,16 @@ def traverse_json_ld(json_ld, at_top_level=True):
        traverse_json_ld(json_ld)
        return filter_dict(info)

-    def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
-        return self._parse_json(
-            self._search_regex(
-                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
-                webpage, 'next.js data', fatal=fatal, **kw),
-            video_id, transform_source=transform_source, fatal=fatal)
+    def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
+        if default == '{}':
+            self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
+            default = {}
+        if default is not NO_DEFAULT:
+            fatal = False
+
+        return self._search_json(
+            r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
+            video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)

    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
--- a/yt_dlp/extractor/commonmistakes.py
+++ b/yt_dlp/extractor/commonmistakes.py
@ -40,3 +40,19 @@ def _real_extract(self, url):
            'Your URL starts with a Byte Order Mark (BOM). '
            'Removing the BOM and looking for "%s" ...' % real_url)
        return self.url_result(real_url)
+
+
+class BlobIE(InfoExtractor):
+    IE_DESC = False
+    _VALID_URL = r'blob:'
+
+    _TESTS = [{
+        'url': 'blob:https://www.youtube.com/4eb3d090-a761-46e6-8083-c32016a36e3b',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        raise ExtractorError(
+            'You\'ve asked yt-dlp to download a blob URL. '
+            'A blob URL exists only locally in your browser. '
+            'It is not possible for yt-dlp to access it.', expected=True)
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@ -24,11 +24,15 @@ class CrunchyrollBaseIE(InfoExtractor):
    _BASE_URL = 'https://www.crunchyroll.com'
    _API_BASE = 'https://api.crunchyroll.com'
    _NETRC_MACHINE = 'crunchyroll'
+    _REFRESH_TOKEN = None
    _AUTH_HEADERS = None
+    _AUTH_EXPIRY = None
    _API_ENDPOINT = None
-    _BASIC_AUTH = None
+    _BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
+        't-kdgp2h8c3jub8fn0fq',
+        'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
+    )).encode()).decode()
    _IS_PREMIUM = None
-    _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
    _LOCALE_LOOKUP = {
        'ar': 'ar-SA',
        'de': 'de-DE',
@ -43,69 +47,78 @@ class CrunchyrollBaseIE(InfoExtractor):
        'hi': 'hi-IN',
    }

-    @property
-    def is_logged_in(self):
-        return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
+    def _set_auth_info(self, response):
+        CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
+        CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
+        CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
+
+    def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
+        try:
+            return self._download_json(
+                f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
+                headers=headers, data=urlencode_postdata(data), impersonate=True)
+        except ExtractorError as error:
+            if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
+                raise
+            if target := error.cause.response.extensions.get('impersonate'):
+                raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
+            raise ExtractorError(
+                'Request blocked by Cloudflare. '
+                'Install the required impersonation dependency if possible, '
+                'or else navigate to Crunchyroll in your browser, '
+                'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
+                'and your browser\'s User-Agent (with --user-agent)', expected=True)

    def _perform_login(self, username, password):
-        if self.is_logged_in:
+        if not CrunchyrollBaseIE._REFRESH_TOKEN:
+            CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
+        if CrunchyrollBaseIE._REFRESH_TOKEN:
            return

-        upsell_response = self._download_json(
-            f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
-            query={
-                'sess_id': 1,
-                'device_id': 'whatvalueshouldbeforweb',
-                'device_type': 'com.crunchyroll.static',
-                'access_token': 'giKq5eY27ny3cqz',
-                'referer': f'{self._BASE_URL}/welcome/login'
-            })
-        if upsell_response['code'] != 'ok':
-            raise ExtractorError('Could not get session id')
-        session_id = upsell_response['data']['session_id']
-
-        login_response = self._download_json(
-            f'{self._API_BASE}/login.1.json', None, 'Logging in',
-            data=urlencode_postdata({
-                'account': username,
-                'password': password,
-                'session_id': session_id
-            }))
-        if login_response['code'] != 'ok':
-            raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
-        if not self.is_logged_in:
-            raise ExtractorError('Login succeeded but did not set etp_rt cookie')
-
-    def _update_auth(self):
-        if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
-            return
-
-        if not CrunchyrollBaseIE._BASIC_AUTH:
-            cx_api_param = self._CLIENT_ID[self.is_logged_in]
-            self.write_debug(f'Using cxApiParam={cx_api_param}')
-            CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
-
-        auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
-        if self.is_logged_in:
-            grant_type = 'etp_rt_cookie'
-        else:
-            grant_type = 'client_id'
-            auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
        try:
-            auth_response = self._download_json(
-                f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
-                headers=auth_headers, data=f'grant_type={grant_type}'.encode())
+            login_response = self._request_token(
+                headers={'Authorization': self._BASIC_AUTH}, data={
+                    'username': username,
+                    'password': password,
+                    'grant_type': 'password',
+                    'scope': 'offline_access',
+                }, note='Logging in', errnote='Failed to log in')
        except ExtractorError as error:
-            if isinstance(error.cause, HTTPError) and error.cause.status == 403:
-                raise ExtractorError(
-                    'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
-                    'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
-                    'and your browser\'s User-Agent (with --user-agent)', expected=True)
+            if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+                raise ExtractorError('Invalid username and/or password', expected=True)
            raise

-        CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
-        CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
-        CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
+        CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
+        self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
+        self._set_auth_info(login_response)
+
+    def _update_auth(self):
+        if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
+            return
+
+        auth_headers = {'Authorization': self._BASIC_AUTH}
+        if CrunchyrollBaseIE._REFRESH_TOKEN:
+            data = {
+                'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
+                'grant_type': 'refresh_token',
+                'scope': 'offline_access',
+            }
+        else:
+            data = {'grant_type': 'client_id'}
+            auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
+        try:
+            auth_response = self._request_token(auth_headers, data)
+        except ExtractorError as error:
+            username, password = self._get_login_info()
+            if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
+                raise
+            self.to_screen('Refresh token has expired. Re-logging in')
+            CrunchyrollBaseIE._REFRESH_TOKEN = None
+            self.cache.store(self._NETRC_MACHINE, username, None)
+            self._perform_login(username, password)
+            return
+
+        self._set_auth_info(auth_response)

    def _locale_from_language(self, language):
        config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
@ -168,7 +181,8 @@ def _extract_stream(self, identifier, display_id=None):
        self._update_auth()
        stream_response = self._download_json(
            f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
-            display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
+            display_id, note='Downloading stream info', errnote='Failed to download stream info',
+            headers=CrunchyrollBaseIE._AUTH_HEADERS)

        available_formats = {'': ('', '', stream_response['url'])}
        for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
@ -383,11 +397,12 @@ def entries():

        if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
            message = f'This {object_type} is for premium members only'
-            if self.is_logged_in:
-                raise ExtractorError(message, expected=True)
-            self.raise_login_required(message)
-
-        result['formats'], result['subtitles'] = self._extract_stream(internal_id)
+            if CrunchyrollBaseIE._REFRESH_TOKEN:
+                self.raise_no_formats(message, expected=True, video_id=internal_id)
+            else:
+                self.raise_login_required(message, method='password', metadata_available=True)
+        else:
+            result['formats'], result['subtitles'] = self._extract_stream(internal_id)

        result['chapters'] = self._extract_chapters(internal_id)

@ -573,14 +588,16 @@ def _real_extract(self, url):
        if not response:
            raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)

+        result = self._transform_music_response(response)
+
        if not self._IS_PREMIUM and response.get('isPremiumOnly'):
            message = f'This {response.get("type") or "media"} is for premium members only'
-            if self.is_logged_in:
-                raise ExtractorError(message, expected=True)
-            self.raise_login_required(message)
-
-        result = self._transform_music_response(response)
-        result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
+            if CrunchyrollBaseIE._REFRESH_TOKEN:
+                self.raise_no_formats(message, expected=True, video_id=internal_id)
+            else:
+                self.raise_login_required(message, method='password', metadata_available=True)
+        else:
+            result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)

        return result

--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -560,7 +560,7 @@ def extract_from_jsmods_instances(js_data):
                    js_data, lambda x: x['jsmods']['instances'], list) or [])

        def extract_dash_manifest(video, formats):
-            dash_manifest = video.get('dash_manifest')
+            dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
            if dash_manifest:
                formats.extend(self._parse_mpd_formats(
                    compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
--- a/yt_dlp/extractor/mixch.py
+++ b/yt_dlp/extractor/mixch.py
@ -1,6 +1,12 @@
 from .common import InfoExtractor
 from ..networking.exceptions import HTTPError
-from ..utils import ExtractorError, UserNotLive, int_or_none, url_or_none
+from ..utils import (
+    ExtractorError,
+    UserNotLive,
+    int_or_none,
+    str_or_none,
+    url_or_none,
+)
 from ..utils.traversal import traverse_obj


@ -9,17 +15,20 @@ class MixchIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'

    _TESTS = [{
-        'url': 'https://mixch.tv/u/16236849/live',
+        'url': 'https://mixch.tv/u/16943797/live',
        'skip': 'don\'t know if this live persists',
        'info_dict': {
-            'id': '16236849',
-            'title': '24配信シェア⭕️投票🙏💦',
-            'comment_count': 13145,
-            'view_count': 28348,
-            'timestamp': 1636189377,
-            'uploader': '🦥伊咲👶🏻#フレアワ',
-            'uploader_id': '16236849',
-        }
+            'id': '16943797',
+            'ext': 'mp4',
+            'title': '#EntView #カリナ #セブチ 2024-05-05 06:58',
+            'comment_count': int,
+            'view_count': int,
+            'timestamp': 1714726805,
+            'uploader': 'Ent.View K-news🎶💕',
+            'uploader_id': '16943797',
+            'live_status': 'is_live',
+            'upload_date': '20240503',
+        },
    }, {
        'url': 'https://mixch.tv/u/16137876/live',
        'only_matching': True,
@ -48,8 +57,20 @@ def _real_extract(self, url):
                'protocol': 'm3u8',
            }],
            'is_live': True,
+            '__post_extractor': self.extract_comments(video_id),
        }

+    def _get_comments(self, video_id):
+        yield from traverse_obj(self._download_json(
+            f'https://mixch.tv/api-web/lives/{video_id}/messages', video_id,
+            note='Downloading comments', errnote='Failed to download comments'), (..., {
+                'author': ('name', {str}),
+                'author_id': ('user_id', {str_or_none}),
+                'id': ('message_id', {str}, {lambda x: x or None}),
+                'text': ('body', {str}),
+                'timestamp': ('created', {int}),
+            }))
+

 class MixchArchiveIE(InfoExtractor):
    IE_NAME = 'mixch:archive'
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@ -561,7 +561,8 @@ def _real_extract(self, url):
            'timestamp': ('createTime', {self.kilo_or_none}),
        })

-        if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
+        if not self._yes_playlist(
+                info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
            formats = self.extract_formats(info['mainSong'])

            return {
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@ -1,8 +1,8 @@
 import itertools
+import urllib.parse

 from .common import InfoExtractor
 from .vimeo import VimeoIE
-from ..compat import compat_urllib_parse_unquote
 from ..networking.exceptions import HTTPError
 from ..utils import (
    KNOWN_EXTENSIONS,
@ -14,7 +14,6 @@
    parse_iso8601,
    str_or_none,
    traverse_obj,
-    try_get,
    url_or_none,
    urljoin,
 )
@ -199,7 +198,50 @@ class PatreonIE(PatreonBaseIE):
            'channel_id': '2147162',
            'uploader_url': 'https://www.patreon.com/yaboyroshi',
        },
+    }, {
+        # NSFW vimeo embed URL
+        'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
+        'info_dict': {
+            'id': '902250943',
+            'ext': 'mp4',
+            'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
+            'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
+            'uploader': 'Npickyeonhwa',
+            'uploader_id': '90574422',
+            'uploader_url': 'https://www.patreon.com/Yeonhwa726',
+            'channel_id': '10237902',
+            'channel_url': 'https://www.patreon.com/Yeonhwa726',
+            'duration': 70,
+            'timestamp': 1705150153,
+            'upload_date': '20240113',
+            'comment_count': int,
+            'like_count': int,
+            'thumbnail': r're:^https?://.+',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        # multiple attachments/embeds
+        'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
+        'playlist_count': 3,
+        'info_dict': {
+            'id': '100601977',
+            'title': '"Holy Wars" (Megadeth) Solos Transcription & Lesson/Analysis',
+            'description': 'md5:d099ab976edfce6de2a65c2b169a88d3',
+            'uploader': 'Bradley Hall',
+            'uploader_id': '24401883',
+            'uploader_url': 'https://www.patreon.com/bradleyhallguitar',
+            'channel_id': '3193932',
+            'channel_url': 'https://www.patreon.com/bradleyhallguitar',
+            'channel_follower_count': int,
+            'timestamp': 1710777855,
+            'upload_date': '20240318',
+            'like_count': int,
+            'comment_count': int,
+            'thumbnail': r're:^https?://.+',
+        },
+        'skip': 'Patron-only content',
    }]
+    _RETURN_TYPE = 'video'

    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -214,95 +256,108 @@ def _real_extract(self, url):
                'include': 'audio,user,user_defined_tags,campaign,attachments_media',
            })
        attributes = post['data']['attributes']
-        title = attributes['title'].strip()
-        image = attributes.get('image') or {}
-        info = {
-            'id': video_id,
-            'title': title,
-            'description': clean_html(attributes.get('content')),
-            'thumbnail': image.get('large_url') or image.get('url'),
-            'timestamp': parse_iso8601(attributes.get('published_at')),
-            'like_count': int_or_none(attributes.get('like_count')),
-            'comment_count': int_or_none(attributes.get('comment_count')),
-        }
-        can_view_post = traverse_obj(attributes, 'current_user_can_view')
-        if can_view_post and info['comment_count']:
-            info['__post_extractor'] = self.extract_comments(video_id)
+        info = traverse_obj(attributes, {
+            'title': ('title', {str.strip}),
+            'description': ('content', {clean_html}),
+            'thumbnail': ('image', ('large_url', 'url'), {url_or_none}, any),
+            'timestamp': ('published_at', {parse_iso8601}),
+            'like_count': ('like_count', {int_or_none}),
+            'comment_count': ('comment_count', {int_or_none}),
+        })

-        for i in post.get('included', []):
-            i_type = i.get('type')
-            if i_type == 'media':
-                media_attributes = i.get('attributes') or {}
-                download_url = media_attributes.get('download_url')
+        entries = []
+        idx = 0
+        for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
+            include_type = include['type']
+            if include_type == 'media':
+                media_attributes = traverse_obj(include, ('attributes', {dict})) or {}
+                download_url = url_or_none(media_attributes.get('download_url'))
                ext = mimetype2ext(media_attributes.get('mimetype'))

                # if size_bytes is None, this media file is likely unavailable
                # See: https://github.com/yt-dlp/yt-dlp/issues/4608
                size_bytes = int_or_none(media_attributes.get('size_bytes'))
                if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
-                    # XXX: what happens if there are multiple attachments?
-                    return {
-                        **info,
+                    idx += 1
+                    entries.append({
+                        'id': f'{video_id}-{idx}',
                        'ext': ext,
                        'filesize': size_bytes,
                        'url': download_url,
-                    }
-            elif i_type == 'user':
-                user_attributes = i.get('attributes')
-                if user_attributes:
-                    info.update({
-                        'uploader': user_attributes.get('full_name'),
-                        'uploader_id': str_or_none(i.get('id')),
-                        'uploader_url': user_attributes.get('url'),
                    })

-            elif i_type == 'post_tag':
-                info.setdefault('tags', []).append(traverse_obj(i, ('attributes', 'value')))
+            elif include_type == 'user':
+                info.update(traverse_obj(include, {
+                    'uploader': ('attributes', 'full_name', {str}),
+                    'uploader_id': ('id', {str_or_none}),
+                    'uploader_url': ('attributes', 'url', {url_or_none}),
+                }))

-            elif i_type == 'campaign':
-                info.update({
-                    'channel': traverse_obj(i, ('attributes', 'title')),
-                    'channel_id': str_or_none(i.get('id')),
-                    'channel_url': traverse_obj(i, ('attributes', 'url')),
-                    'channel_follower_count': int_or_none(traverse_obj(i, ('attributes', 'patron_count'))),
-                })
+            elif include_type == 'post_tag':
+                if post_tag := traverse_obj(include, ('attributes', 'value', {str})):
+                    info.setdefault('tags', []).append(post_tag)
+
+            elif include_type == 'campaign':
+                info.update(traverse_obj(include, {
+                    'channel': ('attributes', 'title', {str}),
+                    'channel_id': ('id', {str_or_none}),
+                    'channel_url': ('attributes', 'url', {url_or_none}),
+                    'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
+                }))

        # handle Vimeo embeds
-        if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
-            embed_html = try_get(attributes, lambda x: x['embed']['html'])
-            v_url = url_or_none(compat_urllib_parse_unquote(
-                self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
-            if v_url:
-                v_url = VimeoIE._smuggle_referrer(v_url, 'https://patreon.com')
-                if self._request_webpage(v_url, video_id, 'Checking Vimeo embed URL', fatal=False, errnote=False):
-                    return self.url_result(v_url, VimeoIE, url_transparent=True, **info)
+        if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
+            v_url = urllib.parse.unquote(self._html_search_regex(
+                r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
+                traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
+            if url_or_none(v_url) and self._request_webpage(
+                    v_url, video_id, 'Checking Vimeo embed URL',
+                    headers={'Referer': 'https://patreon.com/'},
+                    fatal=False, errnote=False):
+                entries.append(self.url_result(
+                    VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
+                    VimeoIE, url_transparent=True))

-        embed_url = try_get(attributes, lambda x: x['embed']['url'])
+        embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
        if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
-            return self.url_result(embed_url, **info)
+            entries.append(self.url_result(embed_url))

-        post_file = traverse_obj(attributes, 'post_file')
+        post_file = traverse_obj(attributes, ('post_file', {dict}))
        if post_file:
            name = post_file.get('name')
            ext = determine_ext(name)
            if ext in KNOWN_EXTENSIONS:
-                return {
-                    **info,
+                entries.append({
+                    'id': video_id,
                    'ext': ext,
                    'url': post_file['url'],
-                }
+                })
            elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
                formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
-                return {
-                    **info,
+                entries.append({
+                    'id': video_id,
                    'formats': formats,
                    'subtitles': subtitles,
-                }
+                })

-        if can_view_post is False:
+        can_view_post = traverse_obj(attributes, 'current_user_can_view')
+        comments = None
+        if can_view_post and info.get('comment_count'):
+            comments = self.extract_comments(video_id)
+
+        if not entries and can_view_post is False:
            self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
-        else:
+        elif not entries:
            self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
+        elif len(entries) == 1:
+            info.update(entries[0])
+        else:
+            for entry in entries:
+                entry.update(info)
+            return self.playlist_result(entries, video_id, **info, __post_extractor=comments)
+
+        info['id'] = video_id
+        info['__post_extractor'] = comments
        return info

    def _get_comments(self, post_id):
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@ -361,7 +361,7 @@ def extract_count(key):
            'like_count': extract_count('favoritings') or extract_count('likes'),
            'comment_count': extract_count('comment'),
            'repost_count': extract_count('reposts'),
-            'genre': info.get('genre'),
+            'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
            'formats': formats if not extract_flat else None
        }

@ -395,10 +395,10 @@ class SoundcloudIE(SoundcloudBaseIE):
    _TESTS = [
        {
            'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
-            'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+            'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
            'info_dict': {
                'id': '62986583',
-                'ext': 'mp3',
+                'ext': 'opus',
                'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
                'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
                'uploader': 'E.T. ExTerrestrial Music',
@ -411,6 +411,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
+                'uploader_url': 'https://soundcloud.com/ethmusic',
+                'genres': [],
            }
        },
        # geo-restricted
@ -418,7 +421,7 @@ class SoundcloudIE(SoundcloudBaseIE):
            'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
            'info_dict': {
                'id': '47127627',
-                'ext': 'mp3',
+                'ext': 'opus',
                'title': 'Goldrushed',
                'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
                'uploader': 'The Royal Concept',
@ -431,6 +434,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'uploader_url': 'https://soundcloud.com/the-concept-band',
+                'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
+                'genres': ['Alternative'],
            },
        },
        # private link
@ -452,6 +458,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'uploader_url': 'https://soundcloud.com/jaimemf',
+                'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+                'genres': ['youtubedl'],
            },
        },
        # private link (alt format)
@ -473,6 +482,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'uploader_url': 'https://soundcloud.com/jaimemf',
+                'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+                'genres': ['youtubedl'],
            },
        },
        # downloadable song
@ -482,6 +494,21 @@ class SoundcloudIE(SoundcloudBaseIE):
            'info_dict': {
                'id': '343609555',
                'ext': 'wav',
+                'title': 'The Following',
+                'description': '',
+                'uploader': '80M',
+                'uploader_id': '312384765',
+                'uploader_url': 'https://soundcloud.com/the80m',
+                'upload_date': '20170922',
+                'timestamp': 1506120436,
+                'duration': 397.228,
+                'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg',
+                'license': 'all-rights-reserved',
+                'like_count': int,
+                'comment_count': int,
+                'repost_count': int,
+                'view_count': int,
+                'genres': ['Dance & EDM'],
            },
        },
        # private link, downloadable format
@ -503,6 +530,9 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
+                'uploader_url': 'https://soundcloud.com/oriuplift',
+                'genres': ['Trance'],
            },
        },
        # no album art, use avatar pic for thumbnail
@ -525,6 +555,8 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'uploader_url': 'https://soundcloud.com/garyvee',
+                'genres': [],
            },
            'params': {
                'skip_download': True,
@ -532,13 +564,13 @@ class SoundcloudIE(SoundcloudBaseIE):
        },
        {
            'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
-            'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+            'md5': '8227c3473a4264df6b02ad7e5b7527ac',
            'info_dict': {
                'id': '583011102',
-                'ext': 'mp3',
+                'ext': 'opus',
                'title': 'Mezzo Valzer',
-                'description': 'md5:4138d582f81866a530317bae316e8b61',
-                'uploader': 'Micronie',
+                'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
+                'uploader': 'Giovanni Sarani',
                'uploader_id': '3352531',
                'timestamp': 1551394171,
                'upload_date': '20190228',
@ -549,6 +581,8 @@ class SoundcloudIE(SoundcloudBaseIE):
                'like_count': int,
                'comment_count': int,
                'repost_count': int,
+                'genres': ['Piano'],
+                'uploader_url': 'https://soundcloud.com/giovannisarani',
            },
        },
        {
--- a/yt_dlp/extractor/stacommu.py
+++ b/yt_dlp/extractor/stacommu.py
@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):


 class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P<id>\w+)'
    IE_NAME = 'theatercomplextown:vod'
    _TESTS = [{
        'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
    }, {
        'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
        'only_matching': True,
+    }, {
+        'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
+        'only_matching': True,
    }]

    _API_PATH = 'videoEpisodes'
@ -204,7 +207,7 @@ def _real_extract(self, url):


 class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
    IE_NAME = 'theatercomplextown:ppv'
    _TESTS = [{
        'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
    }, {
        'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
        'only_matching': True,
+    }, {
+        'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
+        'only_matching': True,
    }]

    _API_PATH = 'events'
--- a/yt_dlp/extractor/stv.py
+++ b/yt_dlp/extractor/stv.py
@ -41,7 +41,7 @@ def _real_extract(self, url):
        ptype, video_id = self._match_valid_url(url).groups()

        webpage = self._download_webpage(url, video_id, fatal=False) or ''
-        props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
+        props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
        player_api_cache = try_get(
            props, lambda x: x['initialReduxState']['playerApiCache']) or {}

--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@ -776,7 +776,7 @@ def _real_extract(self, url):
            status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
            video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))

-        elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
+        elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
            self.write_debug('Found next.js data')
            status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
            video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
--- a/yt_dlp/extractor/tva.py
+++ b/yt_dlp/extractor/tva.py
@ -1,10 +1,9 @@
+import functools
+import re
+
 from .common import InfoExtractor
-from ..utils import (
-    float_or_none,
-    int_or_none,
-    smuggle_url,
-    strip_or_none,
-)
+from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
+from ..utils.traversal import traverse_obj


 class TVAIE(InfoExtractor):
@ -49,11 +48,20 @@ class QubIE(InfoExtractor):
        'info_dict': {
            'id': '6084352463001',
            'ext': 'mp4',
-            'title': 'Épisode 01',
+            'title': 'Ép 01. Mon dernier jour',
            'uploader_id': '5481942443001',
            'upload_date': '20190907',
            'timestamp': 1567899756,
            'description': 'md5:9c0d7fbb90939420c651fd977df90145',
+            'thumbnail': r're:https://.+\.jpg',
+            'episode': 'Ép 01. Mon dernier jour',
+            'episode_number': 1,
+            'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
+            'duration': 2625.963,
+            'season': 'Season 1',
+            'season_number': 1,
+            'series': 'Alerte Amber',
+            'channel': 'TVA',
        },
    }, {
        'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
@ -64,22 +72,24 @@ class QubIE(InfoExtractor):

    def _real_extract(self, url):
        entity_id = self._match_id(url)
-        entity = self._download_json(
-            'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
-            entity_id, query={'id': entity_id})
+        webpage = self._download_webpage(url, entity_id)
+        entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
        video_id = entity['videoId']
        episode = strip_or_none(entity.get('name'))

        return {
            '_type': 'url_transparent',
+            'url': f'https://videos.tva.ca/details/_{video_id}',
+            'ie_key': TVAIE.ie_key(),
            'id': video_id,
            'title': episode,
-            # 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
-            'url': 'https://videos.tva.ca/details/_' + video_id,
-            'description': entity.get('longDescription'),
-            'duration': float_or_none(entity.get('durationMillis'), 1000),
            'episode': episode,
-            'episode_number': int_or_none(entity.get('episodeNumber')),
-            # 'ie_key': 'BrightcoveNew',
-            'ie_key': TVAIE.ie_key(),
+            **traverse_obj(entity, {
+                'description': ('longDescription', {str}),
+                'duration': ('durationMillis', {functools.partial(float_or_none, scale=1000)}),
+                'channel': ('knownEntities', 'channel', 'name', {str}),
+                'series': ('knownEntities', 'videoShow', 'name', {str}),
+                'season_number': ('slug', {lambda x: re.search(r'/s(?:ai|ea)son-(\d+)/', x)}, 1, {int_or_none}),
+                'episode_number': ('episodeNumber', {int_or_none}),
+            }),
        }
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@ -451,6 +451,7 @@ def _real_extract(self, url):
            info_page, 'view count', default=None))

        formats = []
+        subtitles = {}
        for format_id, format_url in data.items():
            format_url = url_or_none(format_url)
            if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
@ -462,12 +463,21 @@ def _real_extract(self, url):
                formats.append({
                    'format_id': format_id,
                    'url': format_url,
+                    'ext': 'mp4',
+                    'source_preference': 1,
                    'height': height,
                })
            elif format_id == 'hls':
-                formats.extend(self._extract_m3u8_formats(
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                    format_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=format_id, fatal=False, live=is_live))
+                    m3u8_id=format_id, fatal=False, live=is_live)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
+            elif format_id.startswith('dash_'):
+                fmts, subs = self._extract_mpd_formats_and_subtitles(
+                    format_url, video_id, mpd_id=format_id, fatal=False)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
            elif format_id == 'rtmp':
                formats.append({
                    'format_id': format_id,
@ -475,7 +485,6 @@ def _real_extract(self, url):
                    'ext': 'flv',
                })

-        subtitles = {}
        for sub in data.get('subs') or {}:
            subtitles.setdefault(sub.get('lang', 'en'), []).append({
                'ext': sub.get('title', '.srt').split('.')[-1],
@ -496,6 +505,7 @@ def _real_extract(self, url):
            'comment_count': int_or_none(mv_data.get('commcount')),
            'is_live': is_live,
            'subtitles': subtitles,
+            '_format_sort_fields': ('res', 'source'),
        }


--- a/yt_dlp/extractor/wrestleuniverse.py
+++ b/yt_dlp/extractor/wrestleuniverse.py
@ -12,6 +12,7 @@
    jwt_decode_hs256,
    traverse_obj,
    try_call,
+    url_basename,
    url_or_none,
    urlencode_postdata,
    variadic,
@ -147,7 +148,7 @@ def _download_metadata(self, url, video_id, lang, props_keys):
        metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
        if not metadata:
            webpage = self._download_webpage(url, video_id)
-            nextjs_data = self._search_nextjs_data(webpage, video_id)
+            nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
            metadata = traverse_obj(nextjs_data, (
                'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
        return metadata
@ -194,8 +195,7 @@ def _real_extract(self, url):

        return {
            'id': video_id,
-            'formats': self._get_formats(video_data, (
-                (('protocolHls', 'url'), ('chromecastUrls', ...)), {url_or_none}), video_id),
+            'formats': self._get_formats(video_data, ('protocolHls', 'url', {url_or_none}), video_id),
            **traverse_obj(metadata, {
                'title': ('displayName', {str}),
                'description': ('description', {str}),
@ -259,6 +259,10 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
        'params': {
            'skip_download': 'm3u8',
        },
+    }, {
+        'note': 'manifest provides live-a (partial) and live-b (full) streams',
+        'url': 'https://www.wrestle-universe.com/en/lives/umc99R9XsexXrxr9VjTo9g',
+        'only_matching': True,
    }]

    _API_PATH = 'events'
@ -285,12 +289,16 @@ def _real_extract(self, url):

        video_data, decrypt = self._call_encrypted_api(
            video_id, ':watchArchive', 'watch archive', data={'method': 1})
-        info['formats'] = self._get_formats(video_data, (
-            ('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id)
+        # 'chromecastUrls' can be only partial videos, avoid
+        info['formats'] = self._get_formats(video_data, ('hls', (('urls', ...), 'url'), {url_or_none}), video_id)
        for f in info['formats']:
            # bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
            if f.get('tbr'):
                f['tbr'] = int(f['tbr'] / 2.5)
+            # prefer variants with the same basename as the master playlist to avoid partial streams
+            f['format_id'] = url_basename(f['url']).partition('.')[0]
+            if not f['format_id'].startswith(url_basename(f['manifest_url']).partition('.')[0]):
+                f['preference'] = -10

        hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
        if hls_aes_key:
--- a/yt_dlp/extractor/yandexvideo.py
+++ b/yt_dlp/extractor/yandexvideo.py
@ -259,15 +259,15 @@ def _real_extract(self, url):
            webpage = self._download_webpage(redirect, video_id, note='Redirecting')
        data_json = self._search_json(
            r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
-        serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)',
-                                         webpage, 'server state').replace('State', 'Settings')
+        serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state')
        uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
                                      webpage, 'uploader', default='<a>')
        uploader_name = extract_attributes(uploader).get('aria-label')
-        video_json = try_get(data_json, lambda x: x[serverstate]['exportData']['video'], dict)
-        stream_urls = try_get(video_json, lambda x: x['video']['streams'])
+        item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
+        video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
+
        formats, subtitles = [], {}
-        for s_url in stream_urls:
+        for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})):
            ext = determine_ext(s_url)
            if ext == 'mpd':
                fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
            'id': '16290308',
            'age_limit': 18,
            'categories': [],
-            'description': 'md5:00ea70f642f431c379763c17c2f396bc',
+            'description': str,  # TODO: detect/remove SEO spam description in ytdl backport
            'display_id': 'tinderspecial-trailer1',
            'duration': 298.0,
            'ext': 'mp4',
            'upload_date': '20201123',
            'uploader': 'Ersties',
            'tags': [],
-            'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
-            'timestamp': 1606089600,
+            'thumbnail': r're:https://.+\.jpg',
+            'timestamp': 1606147564,
            'title': 'Tinder In Real Life',
            'view_count': int,
        }
@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):

    def _real_extract(self, url):
        video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
-        definitions = self._download_json(
-            f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
+        self._set_cookie('.youporn.com', 'age_verified', '1')
+        webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
+        definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']

-        def get_format_data(data, f):
-            return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
+        def get_format_data(data, stream_type):
+            info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
+            if not info_url:
+                return []
+            return traverse_obj(
+                self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
+                lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))

        formats = []
        # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@ -123,10 +129,6 @@ def get_format_data(data, f):
            f['height'] = height
            formats.append(f)

-        webpage = self._download_webpage(
-            'http://www.youporn.com/watch/%s' % video_id, display_id,
-            headers={'Cookie': 'age_verified=1'})
-
        title = self._html_search_regex(
            r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
            webpage, 'title', default=None) or self._og_search_title(
--- a/yt_dlp/networking/_curlcffi.py
+++ b/yt_dlp/networking/_curlcffi.py
@ -132,6 +132,16 @@ def _check_extensions(self, extensions):
        extensions.pop('cookiejar', None)
        extensions.pop('timeout', None)

+    def send(self, request: Request) -> Response:
+        target = self._get_request_target(request)
+        try:
+            response = super().send(request)
+        except HTTPError as e:
+            e.response.extensions['impersonate'] = target
+            raise
+        response.extensions['impersonate'] = target
+        return response
+
    def _send(self, request: Request):
        max_redirects_exceeded = False
        session: curl_cffi.requests.Session = self._get_instance(
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@ -497,6 +497,7 @@ class Response(io.IOBase):
    @param headers: response headers.
    @param status: Response HTTP status code. Default is 200 OK.
    @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
+    @param extensions: Dictionary of handler-specific response extensions.
    """

    def __init__(
@ -505,7 +506,9 @@ def __init__(
            url: str,
            headers: Mapping[str, str],
            status: int = 200,
-            reason: str = None):
+            reason: str = None,
+            extensions: dict = None
+    ):

        self.fp = fp
        self.headers = Message()
@ -517,6 +520,7 @@ def __init__(
            self.reason = reason or HTTPStatus(status).phrase
        except ValueError:
            self.reason = None
+        self.extensions = extensions or {}

    def readable(self):
        return self.fp.readable()
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@ -69,6 +69,10 @@ def _get_variant_and_executable_path():
            # Ref: https://en.wikipedia.org/wiki/Uname#Examples
            if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
                machine = '_x86' if platform.architecture()[0][:2] == '32' else ''
+            # sys.executable returns a /tmp/ path for staticx builds (linux_static)
+            # Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information
+            if static_exe_path := os.getenv('STATICX_PROG_PATH'):
+                path = static_exe_path
        return f'{remove_end(sys.platform, "32")}{machine}_exe', path

    path = os.path.dirname(__file__)
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -1638,16 +1638,14 @@ def get_filesystem_encoding():
    return encoding if encoding is not None else 'utf-8'


-_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'})
+_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
 _CMD_QUOTE_TRANS = str.maketrans({
    # Keep quotes balanced by replacing them with `""` instead of `\\"`
    '"': '""',
-    # Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`)
+    # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
    # `=` should be unique since variables containing `=` cannot be set using cmd
    '\n': '%=%',
-    # While we are only required to escape backslashes immediately before quotes,
-    # we instead escape all of 'em anyways to be consistent
-    '\\': '\\\\',
+    '\r': '%=%',
    # Use zero length variable replacement so `%` doesn't get expanded
    # `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
    '%': '%%cd:~,%',
@ -1656,19 +1654,14 @@ def get_filesystem_encoding():

 def shell_quote(args, *, shell=False):
    args = list(variadic(args))
-    if any(isinstance(item, bytes) for item in args):
-        deprecation_warning('Passing bytes to utils.shell_quote is deprecated')
-        encoding = get_filesystem_encoding()
-        for index, item in enumerate(args):
-            if isinstance(item, bytes):
-                args[index] = item.decode(encoding)

    if compat_os_name != 'nt':
        return shlex.join(args)

    trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
    return ' '.join(
-        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""')
+        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
+        else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
        for s in args)