mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 06:10:12 +01:00
merge 'master'
This commit is contained in:
commit
dcc4a1672b
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
|
@ -240,7 +240,7 @@ jobs:
|
|||
permissions:
|
||||
contents: read
|
||||
actions: write # For cleaning up cache
|
||||
runs-on: macos-12
|
||||
runs-on: macos-13
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -346,7 +346,7 @@ jobs:
|
|||
macos_legacy:
|
||||
needs: process
|
||||
if: inputs.macos_legacy
|
||||
runs-on: macos-12
|
||||
runs-on: macos-13
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
|
2
.github/workflows/core.yml
vendored
2
.github/workflows/core.yml
vendored
|
@ -59,4 +59,4 @@ jobs:
|
|||
continue-on-error: False
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true # Print debug head
|
||||
python3 ./devscripts/run_tests.py core
|
||||
python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core
|
||||
|
|
2
.github/workflows/quick-test.yml
vendored
2
.github/workflows/quick-test.yml
vendored
|
@ -20,7 +20,7 @@ jobs:
|
|||
timeout-minutes: 15
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true
|
||||
python3 ./devscripts/run_tests.py core
|
||||
python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core
|
||||
check:
|
||||
name: Code check
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip all')"
|
||||
|
|
|
@ -673,3 +673,8 @@ rakslice
|
|||
sahilsinghss73
|
||||
tony-hn
|
||||
xingchensong
|
||||
BallzCrasher
|
||||
coreywright
|
||||
eric321
|
||||
poyhen
|
||||
tetra-fox
|
||||
|
|
22
Changelog.md
22
Changelog.md
|
@ -4,6 +4,28 @@ # Changelog
|
|||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2024.10.07
|
||||
|
||||
#### Core changes
|
||||
- **cookies**: [Fix cookie load error handling](https://github.com/yt-dlp/yt-dlp/commit/e59c82a74cda5139eb3928c75b0bd45484dbe7f0) ([#11140](https://github.com/yt-dlp/yt-dlp/issues/11140)) by [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
#### Extractor changes
|
||||
- **applepodcasts**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6328e2e67a4e126e08af382e6a387073082d5c5f) ([#10903](https://github.com/yt-dlp/yt-dlp/issues/10903)) by [coreywright](https://github.com/coreywright)
|
||||
- **cwtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4b7bec66d8100978b82bb24110ed44e2a7749931) ([#11135](https://github.com/yt-dlp/yt-dlp/issues/11135)) by [kclauhk](https://github.com/kclauhk)
|
||||
- **instagram**
|
||||
- [Do not hardcode user-agent](https://github.com/yt-dlp/yt-dlp/commit/079a7bc334281d3c13d347770ae5f9f2b7da471a) ([#11155](https://github.com/yt-dlp/yt-dlp/issues/11155)) by [poyhen](https://github.com/poyhen)
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cf85cba5d9496bd2689e1070005b4d1b4cd3dc6d) ([#11156](https://github.com/yt-dlp/yt-dlp/issues/11156)) by [tetra-fox](https://github.com/tetra-fox)
|
||||
- **noodlemagazine**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ccb23e1bac9768d1c70535beb744e668ed4a2720) ([#11144](https://github.com/yt-dlp/yt-dlp/issues/11144)) by [BallzCrasher](https://github.com/BallzCrasher)
|
||||
- **patreon**: [Extract all m3u8 formats for locked posts](https://github.com/yt-dlp/yt-dlp/commit/f91645aceaf13926cf35be2c1dfef61b3aab97fb) ([#11138](https://github.com/yt-dlp/yt-dlp/issues/11138)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**: [Change default player clients to `ios,mweb`](https://github.com/yt-dlp/yt-dlp/commit/de2062753a188060d76f587e45becce61fe399f9) ([#11190](https://github.com/yt-dlp/yt-dlp/issues/11190)) by [seproDev](https://github.com/seproDev)
|
||||
|
||||
#### Postprocessor changes
|
||||
- **xattrmetadata**: [Try to write each attribute](https://github.com/yt-dlp/yt-dlp/commit/3a193346eeb27ac2959ff30c370adb899ec94732) ([#11115](https://github.com/yt-dlp/yt-dlp/issues/11115)) by [eric321](https://github.com/eric321)
|
||||
|
||||
#### Misc. changes
|
||||
- **ci**: [Rerun failed tests](https://github.com/yt-dlp/yt-dlp/commit/b31b81d85f00601710d4fac590c3e4efb4133283) ([#11143](https://github.com/yt-dlp/yt-dlp/issues/11143)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **cleanup**: Miscellaneous: [1a176d8](https://github.com/yt-dlp/yt-dlp/commit/1a176d874e6772cd898ce507379ea388e96ee3f7) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2024.09.27
|
||||
|
||||
#### Important changes
|
||||
|
|
|
@ -278,7 +278,7 @@ ### Related scripts
|
|||
* **`devscripts/update-version.py`** - Update the version number based on the current date.
|
||||
* **`devscripts/set-variant.py`** - Set the build variant of the executable.
|
||||
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
|
||||
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
|
||||
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS` to something nonempty to forcefully disable lazy extractor loading.
|
||||
|
||||
Note: See their `--help` for more info.
|
||||
|
||||
|
@ -1771,7 +1771,7 @@ # EXTRACTOR ARGUMENTS
|
|||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
|
@ -1795,6 +1795,7 @@ #### generic
|
|||
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
|
||||
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
|
||||
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
|
||||
* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation
|
||||
|
||||
#### funimation
|
||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||
|
@ -1897,6 +1898,7 @@ # PLUGINS
|
|||
myplugin.py
|
||||
|
||||
yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them.
|
||||
Set the environment variable `YTDLP_NO_PLUGINS` to something nonempty to disable loading plugins entirely.
|
||||
|
||||
See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins)
|
||||
|
||||
|
|
|
@ -190,5 +190,11 @@
|
|||
"action": "add",
|
||||
"when": "fb8b7f226d251e521a89b23c415e249e5b788e5c",
|
||||
"short": "[priority] **The minimum *recommended* Python version has been raised to 3.9**\nSince Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "b31b81d85f00601710d4fac590c3e4efb4133283",
|
||||
"short": "[ci] Rerun failed tests (#11143)",
|
||||
"authors": ["Grub4K"]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
@ -34,18 +33,14 @@ class {name}({bases}):
|
|||
|
||||
|
||||
def main():
|
||||
os.environ['YTDLP_NO_PLUGINS'] = 'true'
|
||||
os.environ['YTDLP_NO_LAZY_EXTRACTORS'] = 'true'
|
||||
|
||||
lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
|
||||
if os.path.exists(lazy_extractors_filename):
|
||||
os.remove(lazy_extractors_filename)
|
||||
|
||||
_ALL_CLASSES = get_all_ies() # Must be before import
|
||||
|
||||
import yt_dlp.plugins
|
||||
from yt_dlp.extractor.extractors import _ALL_CLASSES
|
||||
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
|
||||
# Filter out plugins
|
||||
_ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')]
|
||||
|
||||
DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
|
||||
module_src = '\n'.join((
|
||||
MODULE_TEMPLATE,
|
||||
|
@ -58,20 +53,6 @@ def main():
|
|||
write_file(lazy_extractors_filename, f'{module_src}\n')
|
||||
|
||||
|
||||
def get_all_ies():
|
||||
PLUGINS_DIRNAME = 'ytdlp_plugins'
|
||||
BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
|
||||
if os.path.exists(PLUGINS_DIRNAME):
|
||||
# os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958
|
||||
shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
|
||||
try:
|
||||
from yt_dlp.extractor.extractors import _ALL_CLASSES
|
||||
finally:
|
||||
if os.path.exists(BLOCKED_DIRNAME):
|
||||
shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
|
||||
return _ALL_CLASSES
|
||||
|
||||
|
||||
def extra_ie_code(ie, base=None):
|
||||
for var in STATIC_CLASS_PROPERTIES:
|
||||
val = getattr(ie, var)
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Run selected yt-dlp tests')
|
||||
parser.add_argument(
|
||||
'test', help='a extractor tests, or one of "core" or "download"', nargs='*')
|
||||
'test', help='an extractor test, test path, or one of "core" or "download"', nargs='*')
|
||||
parser.add_argument(
|
||||
'-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
|
||||
parser.add_argument(
|
||||
|
@ -27,7 +27,6 @@ def parse_args():
|
|||
def run_tests(*tests, pattern=None, ci=False):
|
||||
run_core = 'core' in tests or (not pattern and not tests)
|
||||
run_download = 'download' in tests
|
||||
tests = list(map(fix_test_name, tests))
|
||||
|
||||
pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '')
|
||||
arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)]
|
||||
|
@ -41,7 +40,9 @@ def run_tests(*tests, pattern=None, ci=False):
|
|||
arguments.extend(['-m', 'download'])
|
||||
else:
|
||||
arguments.extend(
|
||||
f'test/test_download.py::TestDownload::test_{test}' for test in tests)
|
||||
test if '/' in test
|
||||
else f'test/test_download.py::TestDownload::test_{fix_test_name(test)}'
|
||||
for test in tests)
|
||||
|
||||
print(f'Running {arguments}', flush=True)
|
||||
try:
|
||||
|
|
|
@ -80,6 +80,7 @@ static-analysis = [
|
|||
]
|
||||
test = [
|
||||
"pytest~=8.1",
|
||||
"pytest-rerunfailures~=14.0",
|
||||
]
|
||||
pyinstaller = [
|
||||
"pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0
|
||||
|
@ -162,7 +163,6 @@ lint-fix = "ruff check --fix {args:.}"
|
|||
features = ["test"]
|
||||
dependencies = [
|
||||
"pytest-randomly~=3.15",
|
||||
"pytest-rerunfailures~=14.0",
|
||||
"pytest-xdist[psutil]~=3.5",
|
||||
]
|
||||
|
||||
|
|
|
@ -4,8 +4,18 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from yt_dlp.utils import dict_get, int_or_none, str_or_none
|
||||
from yt_dlp.utils.traversal import traverse_obj
|
||||
from yt_dlp.utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
from yt_dlp.utils.traversal import (
|
||||
traverse_obj,
|
||||
require,
|
||||
subs_list_to_dict,
|
||||
)
|
||||
|
||||
_TEST_DATA = {
|
||||
100: 100,
|
||||
|
@ -420,6 +430,71 @@ def test_traversal_morsel(self):
|
|||
assert traverse_obj(morsel, [(None,), any]) == morsel, \
|
||||
'Morsel should not be implicitly changed to dict on usage'
|
||||
|
||||
def test_traversal_filter(self):
|
||||
data = [None, False, True, 0, 1, 0.0, 1.1, '', 'str', {}, {0: 0}, [], [1]]
|
||||
|
||||
assert traverse_obj(data, [..., filter]) == [True, 1, 1.1, 'str', {0: 0}, [1]], \
|
||||
'`filter` should filter falsy values'
|
||||
|
||||
|
||||
class TestTraversalHelpers:
|
||||
def test_traversal_require(self):
|
||||
with pytest.raises(ExtractorError):
|
||||
traverse_obj(_TEST_DATA, ['None', {require('value')}])
|
||||
assert traverse_obj(_TEST_DATA, ['str', {require('value')}]) == 'str', \
|
||||
'`require` should pass through non `None` values'
|
||||
|
||||
def test_subs_list_to_dict(self):
|
||||
assert traverse_obj([
|
||||
{'name': 'de', 'url': 'https://example.com/subs/de.vtt'},
|
||||
{'name': 'en', 'url': 'https://example.com/subs/en1.ass'},
|
||||
{'name': 'en', 'url': 'https://example.com/subs/en2.ass'},
|
||||
], [..., {
|
||||
'id': 'name',
|
||||
'url': 'url',
|
||||
}, all, {subs_list_to_dict}]) == {
|
||||
'de': [{'url': 'https://example.com/subs/de.vtt'}],
|
||||
'en': [
|
||||
{'url': 'https://example.com/subs/en1.ass'},
|
||||
{'url': 'https://example.com/subs/en2.ass'},
|
||||
],
|
||||
}, 'function should build subtitle dict from list of subtitles'
|
||||
assert traverse_obj([
|
||||
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
|
||||
{'name': 'de'},
|
||||
{'name': 'en', 'content': 'content'},
|
||||
{'url': 'https://example.com/subs/en'},
|
||||
], [..., {
|
||||
'id': 'name',
|
||||
'data': 'content',
|
||||
'url': 'url',
|
||||
}, all, {subs_list_to_dict}]) == {
|
||||
'de': [{'url': 'https://example.com/subs/de.ass'}],
|
||||
'en': [{'data': 'content'}],
|
||||
}, 'subs with mandatory items missing should be filtered'
|
||||
assert traverse_obj([
|
||||
{'url': 'https://example.com/subs/de.ass', 'name': 'de'},
|
||||
{'url': 'https://example.com/subs/en', 'name': 'en'},
|
||||
], [..., {
|
||||
'id': 'name',
|
||||
'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}],
|
||||
'url': 'url',
|
||||
}, all, {subs_list_to_dict(ext='ext')}]) == {
|
||||
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
|
||||
'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}],
|
||||
}, '`ext` should set default ext but leave existing value untouched'
|
||||
assert traverse_obj([
|
||||
{'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True},
|
||||
{'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False},
|
||||
], [..., {
|
||||
'id': 'name',
|
||||
'quality': ['prio', {int}],
|
||||
'url': 'url',
|
||||
}, all, {subs_list_to_dict(ext='ext')}]) == {'en': [
|
||||
{'url': 'https://example.com/subs/en1', 'ext': 'ext'},
|
||||
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
|
||||
]}, '`quality` key should sort subtitle list accordingly'
|
||||
|
||||
|
||||
class TestDictGet:
|
||||
def test_dict_get(self):
|
||||
|
|
|
@ -221,9 +221,10 @@ def test_sanitize_ids(self):
|
|||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
||||
|
||||
def test_sanitize_path(self):
|
||||
if sys.platform != 'win32':
|
||||
return
|
||||
with unittest.mock.patch('sys.platform', 'win32'):
|
||||
self._test_sanitize_path()
|
||||
|
||||
def _test_sanitize_path(self):
|
||||
self.assertEqual(sanitize_path('abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
||||
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
||||
|
@ -256,6 +257,11 @@ def test_sanitize_path(self):
|
|||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||
|
||||
self.assertEqual(sanitize_path('\\abc'), '\\abc')
|
||||
self.assertEqual(sanitize_path('C:abc'), 'C:abc')
|
||||
self.assertEqual(sanitize_path('C:abc\\..\\'), 'C:..')
|
||||
self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s')
|
||||
|
||||
def test_sanitize_url(self):
|
||||
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
|
||||
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
from .cache import Cache
|
||||
from .compat import urllib # isort: split
|
||||
from .compat import compat_os_name, urllib_req_to_req
|
||||
from .cookies import LenientSimpleCookie, load_cookies
|
||||
from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
|
||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .extractor import gen_extractor_classes, get_info_extractor
|
||||
|
@ -1624,7 +1624,7 @@ def wrapper(self, *args, **kwargs):
|
|||
while True:
|
||||
try:
|
||||
return func(self, *args, **kwargs)
|
||||
except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
|
||||
except (CookieLoadError, DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
|
||||
raise
|
||||
except ReExtractInfo as e:
|
||||
if e.expected:
|
||||
|
@ -3580,6 +3580,8 @@ def __download_wrapper(self, func):
|
|||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
res = func(*args, **kwargs)
|
||||
except CookieLoadError:
|
||||
raise
|
||||
except UnavailableVideoError as e:
|
||||
self.report_error(e)
|
||||
except DownloadCancelled as e:
|
||||
|
@ -4068,6 +4070,10 @@ def get_encoding(stream):
|
|||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
if os.environ.get('YTDLP_NO_PLUGINS'):
|
||||
write_debug('Plugins are forcibly disabled')
|
||||
return
|
||||
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['{}{}'.format(
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
|
@ -4113,8 +4119,14 @@ def proxies(self):
|
|||
@functools.cached_property
|
||||
def cookiejar(self):
|
||||
"""Global cookiejar instance"""
|
||||
try:
|
||||
return load_cookies(
|
||||
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
|
||||
except CookieLoadError as error:
|
||||
cause = error.__context__
|
||||
# compat: <=py3.9: `traceback.format_exception` has a different signature
|
||||
self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)))
|
||||
raise
|
||||
|
||||
@property
|
||||
def _opener(self):
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
import traceback
|
||||
|
||||
from .compat import compat_os_name
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
|
||||
from .downloader.external import get_external_downloader
|
||||
from .extractor import list_extractor_classes
|
||||
from .extractor.adobepass import MSO_INFO
|
||||
|
@ -1084,7 +1084,7 @@ def main(argv=None):
|
|||
_IN_CLI = True
|
||||
try:
|
||||
_exit(*variadic(_real_main(argv)))
|
||||
except DownloadError:
|
||||
except (CookieLoadError, DownloadError):
|
||||
_exit(1)
|
||||
except SameFileError as e:
|
||||
_exit(f'ERROR: {e}')
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import (
|
||||
DownloadError,
|
||||
YoutubeDLError,
|
||||
Popen,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
|
@ -86,7 +87,12 @@ def _create_progress_bar(logger):
|
|||
return printer
|
||||
|
||||
|
||||
class CookieLoadError(YoutubeDLError):
|
||||
pass
|
||||
|
||||
|
||||
def load_cookies(cookie_file, browser_specification, ydl):
|
||||
try:
|
||||
cookie_jars = []
|
||||
if browser_specification is not None:
|
||||
browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
|
||||
|
@ -104,6 +110,8 @@ def load_cookies(cookie_file, browser_specification, ydl):
|
|||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
except Exception:
|
||||
raise CookieLoadError('failed to load cookies')
|
||||
|
||||
|
||||
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
|
||||
|
|
|
@ -1355,6 +1355,7 @@
|
|||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
|
@ -1454,7 +1455,11 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
}, headers={
|
||||
# yt-dlp's default user-agent is usually too old for Comcast_SSO
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||
'User-Agent': self._MODERN_USER_AGENT,
|
||||
} if mso_id == 'Comcast_SSO' else None)
|
||||
elif not self._cookies_passed:
|
||||
raise_mvpd_required()
|
||||
|
||||
|
|
|
@ -1,27 +1,42 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
|
||||
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
|
||||
'info_dict': {
|
||||
'id': '1000665010654',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
|
||||
'upload_date': '20240812',
|
||||
'timestamp': 1723449600,
|
||||
'duration': 3596,
|
||||
'series': 'Ferreck Dawn - To The Break of Dawn',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'episode': '207 - Whitney Webb Returns',
|
||||
'episode_number': 207,
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'duration': 5369,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
|
@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
server_data = self._search_json(
|
||||
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
|
||||
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
|
||||
model_data = traverse_obj(server_data, (
|
||||
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
|
||||
'model', {dict}, any))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
**self._json_ld(
|
||||
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
|
||||
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
|
||||
**traverse_obj(model_data, {
|
||||
'title': ('title', {str}),
|
||||
'url': ('streamUrl', {clean_podcast_url}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
|
|
@ -573,13 +573,13 @@ class InfoExtractor:
|
|||
|
||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
cookies_hint = 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'
|
||||
return {
|
||||
None: '',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}. {cookies_hint}',
|
||||
'password': f'Use {password_hint}',
|
||||
'cookies': (
|
||||
'Use --cookies-from-browser or --cookies for the authentication. '
|
||||
'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
|
||||
'cookies': f'Use --cookies-from-browser or --cookies for the authentication. {cookies_hint}',
|
||||
'session_cookies': f'Use --cookies for the authentication (--cookies-from-browser might not work). {cookies_hint}',
|
||||
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
|
@ -1710,7 +1710,7 @@ def traverse_json_ld(json_ld, at_top_level=True):
|
|||
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
|
||||
if rating is not None:
|
||||
info['average_rating'] = rating
|
||||
if is_type(e, 'TVEpisode', 'Episode'):
|
||||
if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': episode_name,
|
||||
|
|
|
@ -6,12 +6,37 @@
|
|||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/all-american-homecoming/ready-or-not/?play=d848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'info_dict': {
|
||||
'id': 'd848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ready Or Not',
|
||||
'description': 'Simone is concerned about changes taking place at Bringston; JR makes a decision about his future.',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 2547,
|
||||
'timestamp': 1720519200,
|
||||
'uploader': 'CWTV',
|
||||
'chapters': 'count:6',
|
||||
'series': 'All American: Homecoming',
|
||||
'season_number': 3,
|
||||
'episode_number': 1,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20240709',
|
||||
'season': 'Season 3',
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
|
@ -69,13 +94,14 @@ class CWTVIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)
|
||||
f'https://images.cwtv.com/feed/mobileapp/video-meta/apiversion_12/guid_{video_id}', video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or f'http://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U'
|
||||
mpx_url = update_url_query(
|
||||
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
|
||||
{'formats': 'M3U+none'})
|
||||
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
|
|
|
@ -139,12 +139,11 @@ def _real_initialize(self):
|
|||
return
|
||||
|
||||
token_response = self._download_json(
|
||||
'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
'https://isl.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
note='Downloading anonymous token', headers={
|
||||
'content-type': 'application/json',
|
||||
}, query={
|
||||
'device': 'web_browser',
|
||||
'ff': 'idp,ldp,rpt',
|
||||
'device': 'phone_android',
|
||||
'lang': 'da',
|
||||
'supportFallbackToken': 'true',
|
||||
}, data=json.dumps({
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
MEDIA_EXTENSIONS,
|
||||
|
@ -2373,6 +2374,12 @@ def _real_extract(self, url):
|
|||
else:
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
# Try to impersonate a web-browser by default if possible
|
||||
# Skip impersonation if not available to omit the warning
|
||||
impersonate = self._configuration_arg('impersonate', [''])
|
||||
if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
impersonate = None
|
||||
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
||||
|
@ -2384,7 +2391,7 @@ def _real_extract(self, url):
|
|||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}))
|
||||
}), impersonate=impersonate)
|
||||
new_url = full_response.url
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
|
|
|
@ -48,7 +48,6 @@ class InstagramBaseIE(InfoExtractor):
|
|||
'X-IG-WWW-Claim': '0',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
|
@ -435,10 +434,10 @@ def _real_extract(self, url):
|
|||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
||||
'doc_id': '8845758582119845',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
||||
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
|
||||
|
||||
if not general_info:
|
||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||
|
|
|
@ -43,14 +43,8 @@ def _real_extract(self, url):
|
|||
def build_url(url_or_path):
|
||||
return urljoin('https://adult.noodlemagazine.com', url_or_path)
|
||||
|
||||
headers = {'Referer': url}
|
||||
player_path = self._html_search_regex(
|
||||
r'<iframe[^>]+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path')
|
||||
player_iframe = self._download_webpage(
|
||||
build_url(player_path), video_id, 'Downloading iframe page', headers=headers)
|
||||
playlist_url = self._search_regex(
|
||||
r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
|
||||
playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers)
|
||||
playlist_info = self._search_json(
|
||||
r'window\.playlist\s*=', webpage, video_id, 'playlist info')
|
||||
|
||||
formats = []
|
||||
for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])):
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import functools
|
||||
import itertools
|
||||
import urllib.parse
|
||||
|
||||
|
@ -22,13 +23,19 @@
|
|||
|
||||
|
||||
class PatreonBaseIE(InfoExtractor):
|
||||
USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
|
||||
@functools.cached_property
|
||||
def patreon_user_agent(self):
|
||||
# Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection.
|
||||
# Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in
|
||||
if self._get_cookies('https://www.patreon.com/').get('session_id'):
|
||||
return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)'
|
||||
return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
|
||||
|
||||
def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
if 'User-Agent' not in headers:
|
||||
headers['User-Agent'] = self.USER_AGENT
|
||||
headers['User-Agent'] = self.patreon_user_agent
|
||||
if query:
|
||||
query.update({'json-api-version': 1.0})
|
||||
|
||||
|
@ -48,6 +55,7 @@ def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None
|
|||
|
||||
|
||||
class PatreonIE(PatreonBaseIE):
|
||||
IE_NAME = 'patreon'
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||
|
@ -111,6 +119,7 @@ class PatreonIE(PatreonBaseIE):
|
|||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'chapters': 'count:4',
|
||||
'timestamp': 1423689666,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
|
@ -221,6 +230,7 @@ class PatreonIE(PatreonBaseIE):
|
|||
'thumbnail': r're:^https?://.+',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# multiple attachments/embeds
|
||||
'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
|
||||
|
@ -326,8 +336,13 @@ def _real_extract(self, url):
|
|||
if embed_url and (urlh := self._request_webpage(
|
||||
embed_url, video_id, 'Checking embed URL', headers=headers,
|
||||
fatal=False, errnote=False, expected_status=403)):
|
||||
# Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need
|
||||
# to check for "Sorry, we couldn&rsquo;t find that page" in the meta description tag
|
||||
meta_description = clean_html(self._html_search_meta(
|
||||
'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None))
|
||||
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
|
||||
if urlh.status != 403 or VidsIoIE.suitable(embed_url):
|
||||
if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page')
|
||||
or VidsIoIE.suitable(embed_url)):
|
||||
entries.append(self.url_result(smuggle_url(embed_url, headers)))
|
||||
|
||||
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
||||
|
@ -419,15 +434,19 @@ def _get_comments(self, post_id):
|
|||
|
||||
|
||||
class PatreonCampaignIE(PatreonBaseIE):
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m|api/campaigns)/(?P<campaign_id>\d+)|(?P<vanity>[-\w]+))'
|
||||
IE_NAME = 'patreon:campaign'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?patreon\.com/(?:
|
||||
(?:m|api/campaigns)/(?P<campaign_id>\d+)|
|
||||
(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
|
||||
)(?:/posts)?/?(?:$|[?#])'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.patreon.com/dissonancepod/',
|
||||
'info_dict': {
|
||||
'title': 'Cognitive Dissonance Podcast',
|
||||
'channel_url': 'https://www.patreon.com/dissonancepod',
|
||||
'id': '80642',
|
||||
'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7',
|
||||
'description': r're:(?s).*We produce a weekly news podcast focusing on stories that deal with skepticism and religion.*',
|
||||
'channel_id': '80642',
|
||||
'channel': 'Cognitive Dissonance Podcast',
|
||||
'age_limit': 0,
|
||||
|
@ -445,7 +464,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
|||
'id': '4767637',
|
||||
'channel_id': '4767637',
|
||||
'channel_url': 'https://www.patreon.com/notjustbikes',
|
||||
'description': 'md5:9f4b70051216c4d5c58afe580ffc8d0f',
|
||||
'description': r're:(?s).*Not Just Bikes started as a way to explain why we chose to live in the Netherlands.*',
|
||||
'age_limit': 0,
|
||||
'channel': 'Not Just Bikes',
|
||||
'uploader_url': 'https://www.patreon.com/notjustbikes',
|
||||
|
@ -462,7 +481,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
|||
'id': '4243769',
|
||||
'channel_id': '4243769',
|
||||
'channel_url': 'https://www.patreon.com/secondthought',
|
||||
'description': 'md5:69c89a3aba43efdb76e85eb023e8de8b',
|
||||
'description': r're:(?s).*Second Thought is an educational YouTube channel.*',
|
||||
'age_limit': 0,
|
||||
'channel': 'Second Thought',
|
||||
'uploader_url': 'https://www.patreon.com/secondthought',
|
||||
|
@ -482,10 +501,6 @@ class PatreonCampaignIE(PatreonBaseIE):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PatreonIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _entries(self, campaign_id):
|
||||
cursor = None
|
||||
params = {
|
||||
|
@ -512,7 +527,7 @@ def _real_extract(self, url):
|
|||
|
||||
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
|
||||
if campaign_id is None:
|
||||
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT})
|
||||
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
|
||||
campaign_id = self._search_nextjs_data(
|
||||
webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -17,7 +18,7 @@
|
|||
|
||||
class RedditIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'reddit'
|
||||
_VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?reddit(?:media)?\.com/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
||||
'info_dict': {
|
||||
|
@ -251,15 +252,15 @@ def _get_subtitles(self, video_id):
|
|||
return {'en': [{'url': caption_url}]}
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
|
||||
slug, video_id = self._match_valid_url(url).group('slug', 'id')
|
||||
|
||||
try:
|
||||
data = self._download_json(
|
||||
f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403)
|
||||
if not data:
|
||||
fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com'
|
||||
self.to_screen(f'{host} request failed, retrying with {fallback_host}')
|
||||
data = self._download_json(
|
||||
f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403)
|
||||
f'https://www.reddit.com/{slug}/.json', video_id, expected_status=403)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError):
|
||||
self.raise_login_required('Account authentication is required')
|
||||
raise
|
||||
|
||||
if traverse_obj(data, 'error') == 403:
|
||||
reason = data.get('reason')
|
||||
|
|
|
@ -6,11 +6,12 @@
|
|||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class TVerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'skip': 'videos are only available for 7 days',
|
||||
'url': 'https://tver.jp/episodes/ep83nf3w4p',
|
||||
|
@ -21,80 +22,115 @@ class TVerIE(InfoExtractor):
|
|||
'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
|
||||
'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
|
||||
'channel': 'テレビ朝日',
|
||||
'id': 'ep83nf3w4p',
|
||||
'ext': 'mp4',
|
||||
'onair_label': '5月3日(火)放送分',
|
||||
'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着! テレビ朝日 5月3日(火)放送分',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/',
|
||||
'info_dict': {
|
||||
'id': '6359578055112',
|
||||
'ext': 'mp4',
|
||||
'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」',
|
||||
'timestamp': 1722279928,
|
||||
'upload_date': '20240729',
|
||||
'tags': ['20240729', 'japanese', 'japanmedal', 'paris'],
|
||||
'uploader_id': '4774017240001',
|
||||
'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg',
|
||||
'duration': 670.571,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://tver.jp/corner/f0103888',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tver.jp/lp/f0033031',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tver.jp/series/srtxft431v',
|
||||
'info_dict': {
|
||||
'id': 'srtxft431v',
|
||||
'title': '名探偵コナン',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '779ffd97493ed59b0a6277ea726b389e',
|
||||
'info_dict': {
|
||||
'id': 'ref:conan-1137-241005',
|
||||
'ext': 'mp4',
|
||||
'title': '名探偵コナン #1137「行列店、味変の秘密」',
|
||||
'uploader_id': '5330942432001',
|
||||
'tags': [],
|
||||
'channel': '読売テレビ',
|
||||
'series': '名探偵コナン',
|
||||
'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
|
||||
'episode': '#1137「行列店、味変の秘密」',
|
||||
'duration': 1469.077,
|
||||
'timestamp': 1728030405,
|
||||
'upload_date': '20241004',
|
||||
'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://tver.jp/series/sru35hwdd2',
|
||||
'info_dict': {
|
||||
'id': 'sru35hwdd2',
|
||||
'title': '神回だけ見せます!',
|
||||
},
|
||||
'playlist_count': 11,
|
||||
}, {
|
||||
'url': 'https://tver.jp/series/srkq2shp9d',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
_PLATFORM_UID = None
|
||||
_PLATFORM_TOKEN = None
|
||||
_HEADERS = {'x-tver-platform-type': 'web'}
|
||||
_PLATFORM_QUERY = {}
|
||||
|
||||
def _real_initialize(self):
|
||||
create_response = self._download_json(
|
||||
'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None,
|
||||
note='Creating session', data=b'device_type=pc', headers={
|
||||
'Origin': 'https://s.tver.jp',
|
||||
'Referer': 'https://s.tver.jp/',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
session_info = self._download_json(
|
||||
'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
|
||||
None, 'Creating session', data=b'device_type=pc')
|
||||
self._PLATFORM_QUERY = traverse_obj(session_info, ('result', {
|
||||
'platform_uid': 'platform_uid',
|
||||
'platform_token': 'platform_token',
|
||||
}))
|
||||
|
||||
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
|
||||
return self._download_json(
|
||||
f'https://platform-api.tver.jp/service/api/{path}', video_id, note,
|
||||
fatal=fatal, headers=self._HEADERS, query={
|
||||
**self._PLATFORM_QUERY,
|
||||
**(query or {}),
|
||||
})
|
||||
self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid'))
|
||||
self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token'))
|
||||
|
||||
def _yield_episode_ids_for_series(self, series_id):
|
||||
seasons_info = self._download_json(
|
||||
f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}',
|
||||
series_id, 'Downloading seasons info', headers=self._HEADERS)
|
||||
for season_id in traverse_obj(
|
||||
seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})):
|
||||
episodes_info = self._call_platform_api(
|
||||
f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info')
|
||||
yield from traverse_obj(episodes_info, (
|
||||
'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||
|
||||
if video_type == 'olympic/paris2024/video':
|
||||
# Player ID is taken from .content.brightcove.E200.pro.pc.account_id:
|
||||
# https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d=
|
||||
return self.url_result(smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id),
|
||||
{'geo_countries': ['JP']}), 'BrightcoveNew')
|
||||
if video_type == 'series':
|
||||
series_info = self._call_platform_api(
|
||||
f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
|
||||
return self.playlist_from_matches(
|
||||
self._yield_episode_ids_for_series(video_id), video_id,
|
||||
traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
|
||||
ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}')
|
||||
|
||||
elif video_type not in {'series', 'episodes'}:
|
||||
if video_type != 'episodes':
|
||||
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
|
||||
video_id = self._match_id(self._search_regex(
|
||||
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
|
||||
webpage, 'url regex'))
|
||||
|
||||
episode_info = self._download_json(
|
||||
f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
|
||||
video_id, fatal=False,
|
||||
query={
|
||||
'platform_uid': self._PLATFORM_UID,
|
||||
'platform_token': self._PLATFORM_TOKEN,
|
||||
}, headers={
|
||||
'x-tver-platform-type': 'web',
|
||||
episode_info = self._call_platform_api(
|
||||
f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={
|
||||
'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
|
||||
})
|
||||
episode_content = traverse_obj(
|
||||
episode_info, ('result', 'episode', 'content')) or {}
|
||||
|
||||
version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
|
||||
video_info = self._download_json(
|
||||
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
|
||||
query={
|
||||
'v': str_or_none(episode_content.get('version')) or '5',
|
||||
}, headers={
|
||||
'Origin': 'https://tver.jp',
|
||||
'Referer': 'https://tver.jp/',
|
||||
})
|
||||
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info',
|
||||
query={'v': version}, headers={'Referer': 'https://tver.jp/'})
|
||||
p_id = video_info['video']['accountID']
|
||||
r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False)
|
||||
if not r_id:
|
||||
|
@ -110,6 +146,23 @@ def _real_extract(self, url):
|
|||
provider = str_or_none(episode_content.get('productionProviderName'))
|
||||
onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
|
||||
|
||||
thumbnails = [
|
||||
{
|
||||
'id': quality,
|
||||
'url': update_url_query(
|
||||
f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
|
||||
{'v': version}),
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
for quality, width, height in [
|
||||
('small', 480, 270),
|
||||
('medium', 640, 360),
|
||||
('large', 960, 540),
|
||||
('xlarge', 1280, 720),
|
||||
]
|
||||
]
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'title': title,
|
||||
|
@ -119,6 +172,7 @@ def _real_extract(self, url):
|
|||
'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
|
||||
'channel': provider,
|
||||
'description': str_or_none(video_info.get('description')),
|
||||
'thumbnails': thumbnails,
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
|
|
|
@ -27,8 +27,9 @@
|
|||
|
||||
class WeverseBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'weverse'
|
||||
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api/v2'
|
||||
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api'
|
||||
_API_HEADERS = {
|
||||
'Accept': 'application/json',
|
||||
'Referer': 'https://weverse.io/',
|
||||
'WEV-device-Id': str(uuid.uuid4()),
|
||||
}
|
||||
|
@ -39,14 +40,14 @@ def _perform_login(self, username, password):
|
|||
|
||||
headers = {
|
||||
'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a',
|
||||
'x-acc-app-version': '2.2.6',
|
||||
'x-acc-app-version': '3.3.6',
|
||||
'x-acc-language': 'en',
|
||||
'x-acc-service-id': 'weverse',
|
||||
'x-acc-trace-id': str(uuid.uuid4()),
|
||||
'x-clog-user-device-id': str(uuid.uuid4()),
|
||||
}
|
||||
valid_username = traverse_obj(self._download_json(
|
||||
f'{self._ACCOUNT_API_BASE}/signup/email/status', None, note='Checking username',
|
||||
f'{self._ACCOUNT_API_BASE}/v2/signup/email/status', None, note='Checking username',
|
||||
query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword')
|
||||
if not valid_username:
|
||||
raise ExtractorError('Invalid username provided', expected=True)
|
||||
|
@ -54,8 +55,9 @@ def _perform_login(self, username, password):
|
|||
headers['content-type'] = 'application/json'
|
||||
try:
|
||||
auth = self._download_json(
|
||||
f'{self._ACCOUNT_API_BASE}/auth/token/by-credentials', None, data=json.dumps({
|
||||
f'{self._ACCOUNT_API_BASE}/v3/auth/token/by-credentials', None, data=json.dumps({
|
||||
'email': username,
|
||||
'otpSessionId': 'BY_PASS',
|
||||
'password': password,
|
||||
}, separators=(',', ':')).encode(), headers=headers, note='Logging in')
|
||||
except ExtractorError as e:
|
||||
|
@ -78,8 +80,10 @@ def _call_api(self, ep, video_id, data=None, note='Downloading API JSON'):
|
|||
# From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js:
|
||||
key = b'1b9cb6378d959b45714bec49971ade22e6e24e42'
|
||||
api_path = update_url_query(ep, {
|
||||
# 'gcc': 'US',
|
||||
'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4',
|
||||
'language': 'en',
|
||||
'os': 'WEB',
|
||||
'platform': 'WEB',
|
||||
'wpf': 'pc',
|
||||
})
|
||||
|
@ -152,7 +156,7 @@ def _parse_post_meta(self, metadata):
|
|||
'description': ((('extension', 'mediaInfo', 'body'), 'body'), {str}),
|
||||
'uploader': ('author', 'profileName', {str}),
|
||||
'uploader_id': ('author', 'memberId', {str}),
|
||||
'creator': ('community', 'communityName', {str}),
|
||||
'creators': ('community', 'communityName', {str}, all),
|
||||
'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
|
||||
'duration': ('extension', 'video', 'playTime', {float_or_none}),
|
||||
'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
|
||||
|
@ -196,7 +200,7 @@ class WeverseIE(WeverseBaseIE):
|
|||
'channel': 'billlie',
|
||||
'channel_id': '72',
|
||||
'channel_url': 'https://weverse.io/billlie',
|
||||
'creator': 'Billlie',
|
||||
'creators': ['Billlie'],
|
||||
'timestamp': 1666262062,
|
||||
'upload_date': '20221020',
|
||||
'release_timestamp': 1666262058,
|
||||
|
@ -222,7 +226,7 @@ class WeverseIE(WeverseBaseIE):
|
|||
'channel': 'lesserafim',
|
||||
'channel_id': '47',
|
||||
'channel_url': 'https://weverse.io/lesserafim',
|
||||
'creator': 'LE SSERAFIM',
|
||||
'creators': ['LE SSERAFIM'],
|
||||
'timestamp': 1659353400,
|
||||
'upload_date': '20220801',
|
||||
'release_timestamp': 1659353400,
|
||||
|
@ -286,7 +290,7 @@ def _real_extract(self, url):
|
|||
|
||||
elif live_status == 'is_live':
|
||||
video_info = self._call_api(
|
||||
f'/video/v1.0/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
f'/video/v1.2/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
video_id, note='Downloading live JSON')
|
||||
playback = self._parse_json(video_info['lipPlayback'], video_id)
|
||||
m3u8_url = traverse_obj(playback, (
|
||||
|
@ -302,7 +306,7 @@ def _real_extract(self, url):
|
|||
else:
|
||||
infra_video_id = post['extension']['video']['infraVideoId']
|
||||
in_key = self._call_api(
|
||||
f'/video/v1.0/vod/{api_video_id}/inKey?preview=false', video_id,
|
||||
f'/video/v1.1/vod/{api_video_id}/inKey?preview=false', video_id,
|
||||
data=b'{}', note='Downloading VOD API key')['inKey']
|
||||
|
||||
video_info = self._download_json(
|
||||
|
@ -347,7 +351,6 @@ class WeverseMediaIE(WeverseBaseIE):
|
|||
_VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<artist>[^/?#]+)/media/(?P<id>[\d-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://weverse.io/billlie/media/4-116372884',
|
||||
'md5': '8efc9cfd61b2f25209eb1a5326314d28',
|
||||
'info_dict': {
|
||||
'id': 'e-C9wLSQs6o',
|
||||
'ext': 'mp4',
|
||||
|
@ -358,8 +361,9 @@ class WeverseMediaIE(WeverseBaseIE):
|
|||
'channel_url': 'https://www.youtube.com/channel/UCyc9sUCxELTDK9vELO5Fzeg',
|
||||
'uploader': 'Billlie',
|
||||
'uploader_id': '@Billlie',
|
||||
'uploader_url': 'http://www.youtube.com/@Billlie',
|
||||
'uploader_url': 'https://www.youtube.com/@Billlie',
|
||||
'upload_date': '20230403',
|
||||
'timestamp': 1680533992,
|
||||
'duration': 211,
|
||||
'age_limit': 0,
|
||||
'playable_in_embed': True,
|
||||
|
@ -372,6 +376,8 @@ class WeverseMediaIE(WeverseBaseIE):
|
|||
'thumbnail': 'https://i.ytimg.com/vi/e-C9wLSQs6o/maxresdefault.jpg',
|
||||
'categories': ['Entertainment'],
|
||||
'tags': 'count:7',
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://weverse.io/billlie/media/3-102914520',
|
||||
|
@ -386,7 +392,7 @@ class WeverseMediaIE(WeverseBaseIE):
|
|||
'channel': 'billlie',
|
||||
'channel_id': '72',
|
||||
'channel_url': 'https://weverse.io/billlie',
|
||||
'creator': 'Billlie',
|
||||
'creators': ['Billlie'],
|
||||
'timestamp': 1662174000,
|
||||
'upload_date': '20220903',
|
||||
'release_timestamp': 1662174000,
|
||||
|
@ -432,7 +438,7 @@ class WeverseMomentIE(WeverseBaseIE):
|
|||
'uploader_id': '66a07e164b56a696ee71c99315ffe27b',
|
||||
'channel': 'secretnumber',
|
||||
'channel_id': '56',
|
||||
'creator': 'SECRET NUMBER',
|
||||
'creators': ['SECRET NUMBER'],
|
||||
'duration': 10,
|
||||
'upload_date': '20230405',
|
||||
'timestamp': 1680653968,
|
||||
|
@ -441,7 +447,6 @@ class WeverseMomentIE(WeverseBaseIE):
|
|||
'comment_count': int,
|
||||
'availability': 'needs_auth',
|
||||
},
|
||||
'skip': 'Moment has expired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -571,7 +576,7 @@ class WeverseLiveIE(WeverseBaseIE):
|
|||
'channel': 'purplekiss',
|
||||
'channel_id': '35',
|
||||
'channel_url': 'https://weverse.io/purplekiss',
|
||||
'creator': 'PURPLE KISS',
|
||||
'creators': ['PURPLE KISS'],
|
||||
'timestamp': 1680780892,
|
||||
'upload_date': '20230406',
|
||||
'release_timestamp': 1680780883,
|
||||
|
@ -584,6 +589,31 @@ class WeverseLiveIE(WeverseBaseIE):
|
|||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://weverse.io/lesserafim',
|
||||
'info_dict': {
|
||||
'id': '4-181521628',
|
||||
'ext': 'mp4',
|
||||
'title': r're:심심해서요',
|
||||
'description': '',
|
||||
'uploader': '채채🤎',
|
||||
'uploader_id': 'd49b8b06f3cc1d92d655b25ab27ac2e7',
|
||||
'channel': 'lesserafim',
|
||||
'channel_id': '47',
|
||||
'creators': ['LE SSERAFIM'],
|
||||
'channel_url': 'https://weverse.io/lesserafim',
|
||||
'timestamp': 1728570273,
|
||||
'upload_date': '20241010',
|
||||
'release_timestamp': 1728570264,
|
||||
'release_date': '20241010',
|
||||
'thumbnail': r're:https://phinf\.wevpstatic\.net/.+\.png',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'availability': 'needs_auth',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://weverse.io/billlie/',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -1357,7 +1357,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
||||
}
|
||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
||||
_DEFAULT_CLIENTS = ('ios', 'web_creator')
|
||||
_DEFAULT_CLIENTS = ('ios', 'mweb')
|
||||
|
||||
_GEO_BYPASS = False
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
import importlib.util
|
||||
import inspect
|
||||
import itertools
|
||||
import os
|
||||
import pkgutil
|
||||
import sys
|
||||
import traceback
|
||||
|
@ -137,6 +138,8 @@ def load_module(module, module_name, suffix):
|
|||
|
||||
def load_plugins(name, suffix):
|
||||
classes = {}
|
||||
if os.environ.get('YTDLP_NO_PLUGINS'):
|
||||
return classes
|
||||
|
||||
for finder, module_name, _ in iter_modules(name):
|
||||
if any(x.startswith('_') for x in module_name.split('.')):
|
||||
|
|
|
@ -26,19 +26,21 @@ class XAttrMetadataPP(PostProcessor):
|
|||
|
||||
XATTR_MAPPING = {
|
||||
'user.xdg.referrer.url': 'webpage_url',
|
||||
# 'user.xdg.comment': 'description',
|
||||
'user.dublincore.title': 'title',
|
||||
'user.dublincore.date': 'upload_date',
|
||||
'user.dublincore.description': 'description',
|
||||
'user.dublincore.contributor': 'uploader',
|
||||
'user.dublincore.format': 'format',
|
||||
# We do this last because it may get us close to the xattr limits
|
||||
# (e.g., 4kB on ext4), and we don't want to have the other ones fail
|
||||
'user.dublincore.description': 'description',
|
||||
# 'user.xdg.comment': 'description',
|
||||
}
|
||||
|
||||
def run(self, info):
|
||||
mtime = os.stat(info['filepath']).st_mtime
|
||||
self.to_screen('Writing metadata to file\'s xattrs')
|
||||
try:
|
||||
for xattrname, infoname in self.XATTR_MAPPING.items():
|
||||
try:
|
||||
value = info.get(infoname)
|
||||
if value:
|
||||
if infoname == 'upload_date':
|
||||
|
@ -51,9 +53,9 @@ def run(self, info):
|
|||
if e.reason == 'NO_SPACE':
|
||||
self.report_warning(
|
||||
'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
|
||||
'Some extended attributes are not written')
|
||||
f'Extended attribute "{xattrname}" was not written.')
|
||||
elif e.reason == 'VALUE_TOO_LONG':
|
||||
self.report_warning('Unable to write extended attributes due to too long values.')
|
||||
self.report_warning(f'Unable to write extended attribute "{xattrname}" due to too long values.')
|
||||
else:
|
||||
tip = ('You need to use NTFS' if compat_os_name == 'nt'
|
||||
else 'You may have to enable them in your "/etc/fstab"')
|
||||
|
|
|
@ -664,31 +664,51 @@ def replace_insane(char):
|
|||
return result
|
||||
|
||||
|
||||
def _sanitize_path_parts(parts):
|
||||
sanitized_parts = []
|
||||
for part in parts:
|
||||
if not part or part == '.':
|
||||
continue
|
||||
elif part == '..':
|
||||
if sanitized_parts and sanitized_parts[-1] != '..':
|
||||
sanitized_parts.pop()
|
||||
sanitized_parts.append('..')
|
||||
continue
|
||||
# Replace invalid segments with `#`
|
||||
# - trailing dots and spaces (`asdf...` => `asdf..#`)
|
||||
# - invalid chars (`<>` => `##`)
|
||||
sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part)
|
||||
sanitized_parts.append(sanitized_part)
|
||||
|
||||
return sanitized_parts
|
||||
|
||||
|
||||
def sanitize_path(s, force=False):
|
||||
"""Sanitizes and normalizes path on Windows"""
|
||||
# XXX: this handles drive relative paths (c:sth) incorrectly
|
||||
if sys.platform == 'win32':
|
||||
force = False
|
||||
drive_or_unc, _ = os.path.splitdrive(s)
|
||||
elif force:
|
||||
drive_or_unc = ''
|
||||
else:
|
||||
if sys.platform != 'win32':
|
||||
if not force:
|
||||
return s
|
||||
root = '/' if s.startswith('/') else ''
|
||||
return root + '/'.join(_sanitize_path_parts(s.split('/')))
|
||||
|
||||
norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
|
||||
if drive_or_unc:
|
||||
norm_path.pop(0)
|
||||
sanitized_path = [
|
||||
path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
|
||||
for path_part in norm_path]
|
||||
if drive_or_unc:
|
||||
sanitized_path.insert(0, drive_or_unc + os.path.sep)
|
||||
elif force and s and s[0] == os.path.sep:
|
||||
sanitized_path.insert(0, os.path.sep)
|
||||
# TODO: Fix behavioral differences <3.12
|
||||
# The workaround using `normpath` only superficially passes tests
|
||||
# Ref: https://github.com/python/cpython/pull/100351
|
||||
return os.path.normpath(os.path.join(*sanitized_path))
|
||||
normed = s.replace('/', '\\')
|
||||
|
||||
if normed.startswith('\\\\'):
|
||||
# UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`)
|
||||
parts = normed.split('\\')
|
||||
root = '\\'.join(parts[:4]) + '\\'
|
||||
parts = parts[4:]
|
||||
elif normed[1:2] == ':':
|
||||
# absolute path or drive relative path
|
||||
offset = 3 if normed[2:3] == '\\' else 2
|
||||
root = normed[:offset]
|
||||
parts = normed[offset:].split('\\')
|
||||
else:
|
||||
# relative/drive root relative path
|
||||
root = '\\' if normed[:1] == '\\' else ''
|
||||
parts = normed.split('\\')
|
||||
|
||||
return root + '\\'.join(_sanitize_path_parts(parts))
|
||||
|
||||
|
||||
def sanitize_url(url, *, scheme='http'):
|
||||
|
@ -804,14 +824,18 @@ class Popen(subprocess.Popen):
|
|||
_startupinfo = None
|
||||
|
||||
@staticmethod
|
||||
def _fix_pyinstaller_ld_path(env):
|
||||
"""Restore LD_LIBRARY_PATH when using PyInstaller
|
||||
Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
|
||||
https://github.com/yt-dlp/yt-dlp/issues/4573
|
||||
"""
|
||||
def _fix_pyinstaller_issues(env):
|
||||
if not hasattr(sys, '_MEIPASS'):
|
||||
return
|
||||
|
||||
# Force spawning independent subprocesses for exes bundled with PyInstaller>=6.10
|
||||
# Ref: https://pyinstaller.org/en/v6.10.0/CHANGES.html#incompatible-changes
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/11259
|
||||
env['PYINSTALLER_RESET_ENVIRONMENT'] = '1'
|
||||
|
||||
# Restore LD_LIBRARY_PATH when using PyInstaller
|
||||
# Ref: https://pyinstaller.org/en/v6.10.0/runtime-information.html#ld-library-path-libpath-considerations
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/4573
|
||||
def _fix(key):
|
||||
orig = env.get(f'{key}_ORIG')
|
||||
if orig is None:
|
||||
|
@ -825,7 +849,7 @@ def _fix(key):
|
|||
def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs):
|
||||
if env is None:
|
||||
env = os.environ.copy()
|
||||
self._fix_pyinstaller_ld_path(env)
|
||||
self._fix_pyinstaller_issues(env)
|
||||
|
||||
self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
|
||||
if text is True:
|
||||
|
@ -1964,11 +1988,30 @@ def urljoin(base, path):
|
|||
return urllib.parse.urljoin(base, path)
|
||||
|
||||
|
||||
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
|
||||
def partial_application(func):
|
||||
sig = inspect.signature(func)
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapped(*args, **kwargs):
|
||||
try:
|
||||
sig.bind(*args, **kwargs)
|
||||
except TypeError:
|
||||
return functools.partial(func, *args, **kwargs)
|
||||
else:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
@partial_application
|
||||
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None):
|
||||
if get_attr and v is not None:
|
||||
v = getattr(v, get_attr, None)
|
||||
if invscale == 1 and scale < 1:
|
||||
invscale = int(1 / scale)
|
||||
scale = 1
|
||||
try:
|
||||
return int(v) * invscale // scale
|
||||
return (int(v) if base is None else int(v, base=base)) * invscale // scale
|
||||
except (ValueError, TypeError, OverflowError):
|
||||
return default
|
||||
|
||||
|
@ -1986,9 +2029,13 @@ def str_to_int(int_str):
|
|||
return int_or_none(int_str)
|
||||
|
||||
|
||||
@partial_application
|
||||
def float_or_none(v, scale=1, invscale=1, default=None):
|
||||
if v is None:
|
||||
return default
|
||||
if invscale == 1 and scale < 1:
|
||||
invscale = int(1 / scale)
|
||||
scale = 1
|
||||
try:
|
||||
return float(v) * invscale / scale
|
||||
except (ValueError, TypeError):
|
||||
|
|
|
@ -1,18 +1,35 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import collections.abc
|
||||
import contextlib
|
||||
import functools
|
||||
import http.cookies
|
||||
import inspect
|
||||
import itertools
|
||||
import re
|
||||
import typing
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ._utils import (
|
||||
IDENTITY,
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
LazyList,
|
||||
deprecation_warning,
|
||||
get_elements_html_by_class,
|
||||
get_elements_html_by_attribute,
|
||||
get_elements_by_attribute,
|
||||
get_element_html_by_attribute,
|
||||
get_element_by_attribute,
|
||||
get_element_html_by_id,
|
||||
get_element_by_id,
|
||||
get_element_html_by_class,
|
||||
get_elements_by_class,
|
||||
get_element_text_and_html_by_tag,
|
||||
is_iterable_like,
|
||||
try_call,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
@ -54,6 +71,7 @@ def traverse_obj(
|
|||
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
||||
- `any`-builtin: Take the first matching object and return it, resetting branching.
|
||||
- `all`-builtin: Take all matching objects and return them as a list, resetting branching.
|
||||
- `filter`-builtin: Return the value if it is truthy, `None` otherwise.
|
||||
|
||||
`tuple`, `list`, and `dict` all support nested paths and branches.
|
||||
|
||||
|
@ -247,6 +265,10 @@ def apply_path(start_obj, path, test_type):
|
|||
objs = (list(filtered_objs),)
|
||||
continue
|
||||
|
||||
if key is filter:
|
||||
objs = filter(None, objs)
|
||||
continue
|
||||
|
||||
if __debug__ and callable(key):
|
||||
# Verify function signature
|
||||
inspect.signature(key).bind(None, None)
|
||||
|
@ -277,13 +299,143 @@ def _traverse_obj(obj, path, allow_empty, test_type):
|
|||
return results[0] if results else {} if allow_empty and is_dict else None
|
||||
|
||||
for index, path in enumerate(paths, 1):
|
||||
result = _traverse_obj(obj, path, index == len(paths), True)
|
||||
is_last = index == len(paths)
|
||||
try:
|
||||
result = _traverse_obj(obj, path, is_last, True)
|
||||
if result is not None:
|
||||
return result
|
||||
except _RequiredError as e:
|
||||
if is_last:
|
||||
# Reraise to get cleaner stack trace
|
||||
raise ExtractorError(e.orig_msg, expected=e.expected) from None
|
||||
|
||||
return None if default is NO_DEFAULT else default
|
||||
|
||||
|
||||
def value(value, /):
|
||||
return lambda _: value
|
||||
|
||||
|
||||
def require(name, /, *, expected=False):
|
||||
def func(value):
|
||||
if value is None:
|
||||
raise _RequiredError(f'Unable to extract {name}', expected=expected)
|
||||
|
||||
return value
|
||||
|
||||
return func
|
||||
|
||||
|
||||
class _RequiredError(ExtractorError):
|
||||
pass
|
||||
|
||||
|
||||
@typing.overload
|
||||
def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ...
|
||||
|
||||
|
||||
def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
|
||||
"""
|
||||
Convert subtitles from a traversal into a subtitle dict.
|
||||
The path should have an `all` immediately before this function.
|
||||
|
||||
Arguments:
|
||||
`ext` The default value for `ext` in the subtitle dict
|
||||
|
||||
In the dict you can set the following additional items:
|
||||
`id` The subtitle id to sort the dict into
|
||||
`quality` The sort order for each subtitle
|
||||
"""
|
||||
if subs is None:
|
||||
return functools.partial(subs_list_to_dict, ext=ext)
|
||||
|
||||
result = collections.defaultdict(list)
|
||||
|
||||
for sub in subs:
|
||||
if not url_or_none(sub.get('url')) and not sub.get('data'):
|
||||
continue
|
||||
sub_id = sub.pop('id', None)
|
||||
if sub_id is None:
|
||||
continue
|
||||
if ext is not None and not sub.get('ext'):
|
||||
sub['ext'] = ext
|
||||
result[sub_id].append(sub)
|
||||
result = dict(result)
|
||||
|
||||
for subs in result.values():
|
||||
subs.sort(key=lambda x: x.pop('quality', 0) or 0)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_element(*, attr: str, value: str, tag: str | None = None, html=False): ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_element(*, cls: str, html=False): ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_element(*, id: str, tag: str | None = None, html=False): ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_element(*, tag: str, html=False): ...
|
||||
|
||||
|
||||
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
|
||||
# deliberately using `id=` and `cls=` for ease of readability
|
||||
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
|
||||
if not tag:
|
||||
tag = r'[\w:.-]+'
|
||||
|
||||
if attr and value:
|
||||
assert not cls, 'Cannot match both attr and cls'
|
||||
assert not id, 'Cannot match both attr and id'
|
||||
func = get_element_html_by_attribute if html else get_element_by_attribute
|
||||
return functools.partial(func, attr, value, tag=tag)
|
||||
|
||||
elif cls:
|
||||
assert not id, 'Cannot match both cls and id'
|
||||
assert tag is None, 'Cannot match both cls and tag'
|
||||
func = get_element_html_by_class if html else get_elements_by_class
|
||||
return functools.partial(func, cls)
|
||||
|
||||
elif id:
|
||||
func = get_element_html_by_id if html else get_element_by_id
|
||||
return functools.partial(func, id, tag=tag)
|
||||
|
||||
index = int(bool(html))
|
||||
return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_elements(*, cls: str, html=False): ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False): ...
|
||||
|
||||
|
||||
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
|
||||
# deliberately using `cls=` for ease of readability
|
||||
assert cls or (attr and value), 'One of cls or (attr AND value) is required'
|
||||
|
||||
if attr and value:
|
||||
assert not cls, 'Cannot match both attr and cls'
|
||||
func = get_elements_html_by_attribute if html else get_elements_by_attribute
|
||||
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+')
|
||||
|
||||
assert not tag, 'Cannot match both cls and tag'
|
||||
func = get_elements_html_by_class if html else get_elements_by_class
|
||||
return functools.partial(func, cls)
|
||||
|
||||
|
||||
def get_first(obj, *paths, **kwargs):
|
||||
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2024.09.27'
|
||||
__version__ = '2024.10.07'
|
||||
|
||||
RELEASE_GIT_HEAD = 'c6387abc1af9842bb0541288a5610abba9b1ab51'
|
||||
RELEASE_GIT_HEAD = '1a176d874e6772cd898ce507379ea388e96ee3f7'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
|
@ -12,4 +12,4 @@
|
|||
|
||||
ORIGIN = 'yt-dlp/yt-dlp'
|
||||
|
||||
_pkg_version = '2024.09.27'
|
||||
_pkg_version = '2024.10.07'
|
||||
|
|
Loading…
Reference in New Issue
Block a user