mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 07:28:19 +01:00
Compare commits
70 Commits
7e198495bd
...
5884691c5a
Author | SHA1 | Date | |
---|---|---|---|
|
5884691c5a | ||
|
4b5eec0aaa | ||
|
670bafe148 | ||
|
e05694e550 | ||
|
9dd8574b68 | ||
|
160d973aee | ||
|
c0be43d4d7 | ||
|
4f1af12b70 | ||
|
724a6cb2cb | ||
|
66a6e0a686 | ||
|
6208f7be9c | ||
|
6a84199473 | ||
|
54ad67d785 | ||
|
172dfbeaed | ||
|
cf96b24de6 | ||
|
50c943e8a0 | ||
|
6fc6349ef0 | ||
|
5156a16cf9 | ||
|
fb2b57a773 | ||
|
2741b5827d | ||
|
bd730470f2 | ||
|
194bc49c55 | ||
|
1416cee726 | ||
|
622c555356 | ||
|
99e6074c5d | ||
|
1f7974690e | ||
|
8ee942a9c8 | ||
|
444e02ef3b | ||
|
4e93198ae6 | ||
|
78285eea86 | ||
|
7f93eb7a28 | ||
|
128d30492b | ||
|
129555b19a | ||
|
01f672fe27 | ||
|
2fbe18557b | ||
|
b131f3d1f1 | ||
|
544836de83 | ||
|
6cea8cbe2d | ||
|
5e4699a623 | ||
|
79ae58a5c4 | ||
|
3faa1e33ed | ||
|
fbae888c65 | ||
|
cdac7641d6 | ||
|
a43ba2eff6 | ||
|
0ed9a73a73 | ||
|
e40132da09 | ||
|
e6e2eb00f1 | ||
|
9fc70f3f6d | ||
|
5ef1a928a7 | ||
|
db62ffdafe | ||
|
f137666451 | ||
|
e3ffdf76aa | ||
|
9f717b69b4 | ||
|
34d3df72e9 | ||
|
96f5d29db0 | ||
|
c222f6cbfc | ||
|
2d1655493f | ||
|
c376b95f95 | ||
|
8df470761e | ||
|
e3b08bac9c | ||
|
932758707f | ||
|
317ba03fdf | ||
|
e42e25619f | ||
|
fba1c397b1 | ||
|
b83d7526f2 | ||
|
fdb9aaf416 | ||
|
1799a6ae36 | ||
|
367429e238 | ||
|
439be2b4a4 | ||
|
2fbd6de957 |
|
@ -452,10 +452,15 @@ def test_unified_timestamps(self):
|
|||
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
|
||||
self.assertEqual(unified_timestamp('Sunday, 26 Nov 2006, 19:00'), 1164567600)
|
||||
self.assertEqual(unified_timestamp('wed, aug 16, 2008, 12:00pm'), 1218931200)
|
||||
self.assertEqual(unified_timestamp('2022-10-13T02:37:47.831Z'), 1665628667)
|
||||
|
||||
self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
|
||||
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
|
||||
self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
|
||||
self.assertEqual(unified_timestamp('2023-03-09T18:01:33.646Z', with_milliseconds=True), 1678384893.646)
|
||||
# ISO8601 spec says that if no timezone is specified, we should use local timezone;
|
||||
# but yt-dlp uses UTC to keep things consistent
|
||||
self.assertEqual(unified_timestamp('2023-03-11T06:48:34.008'), 1678517314)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
|
|
|
@ -28,7 +28,12 @@
|
|||
from .compat import urllib # isort: split
|
||||
from .compat import urllib_req_to_req
|
||||
from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
|
||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||
from .downloader import (
|
||||
DashSegmentsFD,
|
||||
FFmpegFD,
|
||||
get_suitable_downloader,
|
||||
shorten_protocol_name,
|
||||
)
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .extractor import gen_extractor_classes, get_info_extractor
|
||||
from .extractor.common import UnsupportedURLIE
|
||||
|
@ -3373,7 +3378,7 @@ def existing_video_file(*filepaths):
|
|||
fd, success = None, True
|
||||
if info_dict.get('protocol') or info_dict.get('url'):
|
||||
fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
|
||||
if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
|
||||
if fd not in [FFmpegFD, DashSegmentsFD] and 'no-direct-merge' not in self.params['compat_opts'] and (
|
||||
info_dict.get('section_start') or info_dict.get('section_end')):
|
||||
msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
|
||||
else 'You have requested downloading the video partially, but ffmpeg is not installed')
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
import optparse
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
|
||||
|
@ -340,12 +341,13 @@ def parse_chapters(name, value, advanced=False):
|
|||
(?P<end_sign>-?)(?P<end>[^-]+)
|
||||
)?'''
|
||||
|
||||
current_time = time.time()
|
||||
chapters, ranges, from_url = [], [], False
|
||||
for regex in value or []:
|
||||
if advanced and regex == '*from-url':
|
||||
from_url = True
|
||||
continue
|
||||
elif not regex.startswith('*'):
|
||||
elif not regex.startswith('*') and not regex.startswith('#'):
|
||||
try:
|
||||
chapters.append(re.compile(regex))
|
||||
except re.error as err:
|
||||
|
@ -362,11 +364,16 @@ def parse_chapters(name, value, advanced=False):
|
|||
err = 'Must be of the form "*start-end"'
|
||||
elif not advanced and any(signs):
|
||||
err = 'Negative timestamps are not allowed'
|
||||
else:
|
||||
elif regex.startswith('*'):
|
||||
dur[0] *= -1 if signs[0] else 1
|
||||
dur[1] *= -1 if signs[1] else 1
|
||||
if dur[1] == float('-inf'):
|
||||
err = '"-inf" is not a valid end'
|
||||
elif regex.startswith('#'):
|
||||
dur[0] = dur[0] * (-1 if signs[0] else 1) + current_time
|
||||
dur[1] = dur[1] * (-1 if signs[1] else 1) + current_time
|
||||
if dur[1] == float('-inf'):
|
||||
err = '"-inf" is not a valid end'
|
||||
if err:
|
||||
raise ValueError(f'invalid {name} time range "{regex}". {err}')
|
||||
ranges.append(dur)
|
||||
|
|
|
@ -36,6 +36,8 @@ def real_download(self, filename, info_dict):
|
|||
'filename': fmt.get('filepath') or filename,
|
||||
'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'),
|
||||
'total_frags': fragment_count,
|
||||
'section_start': info_dict.get('section_start'),
|
||||
'section_end': info_dict.get('section_end'),
|
||||
}
|
||||
|
||||
if real_downloader:
|
||||
|
|
|
@ -59,16 +59,15 @@ def _extract_from_api(self, video_id, tld):
|
|||
'Accept': 'application/json',
|
||||
}, fatal=False, impersonate=True) or {}
|
||||
|
||||
status = response.get('room_status')
|
||||
if status != 'public':
|
||||
if error := self._ERROR_MAP.get(status):
|
||||
raise ExtractorError(error, expected=True)
|
||||
self.report_warning('Falling back to webpage extraction')
|
||||
return None
|
||||
|
||||
m3u8_url = response.get('url')
|
||||
if not m3u8_url:
|
||||
self.raise_geo_restricted()
|
||||
status = response.get('room_status')
|
||||
if error := self._ERROR_MAP.get(status):
|
||||
raise ExtractorError(error, expected=True)
|
||||
if status == 'public':
|
||||
self.raise_geo_restricted()
|
||||
self.report_warning(f'Got status "{status}" from API; falling back to webpage extraction')
|
||||
return None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -2733,7 +2733,7 @@ def extract_common(source):
|
|||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
't': int_or_none(s.get('t')),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
|
@ -2775,8 +2775,14 @@ def extract_Initialization(source):
|
|||
return ms_info
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
availability_start_time = unified_timestamp(
|
||||
mpd_doc.get('availabilityStartTime'), with_milliseconds=True) or 0
|
||||
stream_numbers = collections.defaultdict(int)
|
||||
for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
|
||||
# segmentIngestTime is completely out of spec, but YT Livestream do this
|
||||
segment_ingest_time = period.get('{http://youtube.com/yt/2012/10/10}segmentIngestTime')
|
||||
if segment_ingest_time:
|
||||
availability_start_time = unified_timestamp(segment_ingest_time, with_milliseconds=True)
|
||||
period_entry = {
|
||||
'id': period.get('id', f'period-{period_idx}'),
|
||||
'formats': [],
|
||||
|
@ -2955,13 +2961,17 @@ def add_segment_url():
|
|||
'Bandwidth': bandwidth,
|
||||
'Number': segment_number,
|
||||
}
|
||||
duration = float_or_none(segment_d, representation_ms_info['timescale'])
|
||||
start = float_or_none(segment_time, representation_ms_info['timescale'])
|
||||
representation_ms_info['fragments'].append({
|
||||
media_location_key: segment_url,
|
||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||
'duration': duration,
|
||||
'start': availability_start_time + start,
|
||||
'end': availability_start_time + start + duration,
|
||||
})
|
||||
|
||||
for s in representation_ms_info['s']:
|
||||
segment_time = s.get('t') or segment_time
|
||||
segment_time = s['t'] if s.get('t') is not None else segment_time
|
||||
segment_d = s['d']
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
|
@ -2977,6 +2987,7 @@ def add_segment_url():
|
|||
fragments = []
|
||||
segment_index = 0
|
||||
timescale = representation_ms_info['timescale']
|
||||
start = 0
|
||||
for s in representation_ms_info['s']:
|
||||
duration = float_or_none(s['d'], timescale)
|
||||
for _ in range(s.get('r', 0) + 1):
|
||||
|
@ -2984,8 +2995,11 @@ def add_segment_url():
|
|||
fragments.append({
|
||||
location_key(segment_uri): segment_uri,
|
||||
'duration': duration,
|
||||
'start': availability_start_time + start,
|
||||
'end': availability_start_time + start + duration,
|
||||
})
|
||||
segment_index += 1
|
||||
start += duration
|
||||
representation_ms_info['fragments'] = fragments
|
||||
elif 'segment_urls' in representation_ms_info:
|
||||
# Segment URLs with no SegmentTimeline
|
||||
|
|
|
@ -2869,17 +2869,17 @@ def refetch_manifest(format_id, delay):
|
|||
microformats = traverse_obj(
|
||||
prs, (..., 'microformat', 'playerMicroformatRenderer'),
|
||||
expected_type=dict)
|
||||
_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
|
||||
is_live = live_status == 'is_live'
|
||||
start_time = time.time()
|
||||
with lock:
|
||||
_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
|
||||
is_live = live_status == 'is_live'
|
||||
start_time = time.time()
|
||||
|
||||
def mpd_feed(format_id, delay):
|
||||
"""
|
||||
@returns (manifest_url, manifest_stream_number, is_live) or None
|
||||
"""
|
||||
for retry in self.RetryManager(fatal=False):
|
||||
with lock:
|
||||
refetch_manifest(format_id, delay)
|
||||
refetch_manifest(format_id, delay)
|
||||
|
||||
f = next((f for f in formats if f['format_id'] == format_id), None)
|
||||
if not f:
|
||||
|
@ -2910,6 +2910,11 @@ def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, m
|
|||
begin_index = 0
|
||||
download_start_time = ctx.get('start') or time.time()
|
||||
|
||||
section_start = ctx.get('section_start') or 0
|
||||
section_end = ctx.get('section_end') or math.inf
|
||||
|
||||
self.write_debug(f'Selected section: {section_start} -> {section_end}')
|
||||
|
||||
lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
|
||||
if lack_early_segments:
|
||||
self.report_warning(bug_reports_message(
|
||||
|
@ -2930,9 +2935,10 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
|
|||
or (mpd_url, stream_number, False))
|
||||
if not refresh_sequence:
|
||||
if expire_fast and not is_live:
|
||||
return False, last_seq
|
||||
return False
|
||||
elif old_mpd_url == mpd_url:
|
||||
return True, last_seq
|
||||
return True
|
||||
|
||||
if manifestless_orig_fmt:
|
||||
fmt_info = manifestless_orig_fmt
|
||||
else:
|
||||
|
@ -2943,14 +2949,13 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
|
|||
fmts = None
|
||||
if not fmts:
|
||||
no_fragment_score += 2
|
||||
return False, last_seq
|
||||
return False
|
||||
fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
|
||||
fragments = fmt_info['fragments']
|
||||
fragment_base_url = fmt_info['fragment_base_url']
|
||||
assert fragment_base_url
|
||||
|
||||
_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
|
||||
return True, _last_seq
|
||||
return True
|
||||
|
||||
self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
|
||||
while is_live:
|
||||
|
@ -2970,11 +2975,19 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
|
|||
last_segment_url = None
|
||||
continue
|
||||
else:
|
||||
should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
|
||||
should_continue = _extract_sequence_from_mpd(True, no_fragment_score > 15)
|
||||
no_fragment_score += 2
|
||||
if not should_continue:
|
||||
continue
|
||||
|
||||
last_fragment = fragments[-1]
|
||||
last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
|
||||
|
||||
known_fragment = next(
|
||||
(fragment for fragment in fragments if f'sq/{known_idx}' in fragment['path']), None)
|
||||
if known_fragment and known_fragment['end'] > section_end:
|
||||
break
|
||||
|
||||
if known_idx > last_seq:
|
||||
last_segment_url = None
|
||||
continue
|
||||
|
@ -2984,20 +2997,36 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
|
|||
if begin_index < 0 and known_idx < 0:
|
||||
# skip from the start when it's negative value
|
||||
known_idx = last_seq + begin_index
|
||||
|
||||
if lack_early_segments:
|
||||
known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
|
||||
known_idx = max(known_idx, last_seq - int(MAX_DURATION // last_fragment['duration']))
|
||||
|
||||
fragment_count = last_seq - known_idx if section_end == math.inf else int(
|
||||
(section_end - section_start) // last_fragment['duration'])
|
||||
|
||||
try:
|
||||
for idx in range(known_idx, last_seq):
|
||||
# do not update sequence here or you'll get skipped some part of it
|
||||
should_continue, _ = _extract_sequence_from_mpd(False, False)
|
||||
should_continue = _extract_sequence_from_mpd(False, False)
|
||||
if not should_continue:
|
||||
known_idx = idx - 1
|
||||
raise ExtractorError('breaking out of outer loop')
|
||||
last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
|
||||
yield {
|
||||
'url': last_segment_url,
|
||||
'fragment_count': last_seq,
|
||||
}
|
||||
|
||||
frag_duration = last_fragment['duration']
|
||||
frag_start = last_fragment['start'] - (last_seq - idx) * frag_duration
|
||||
frag_end = frag_start + frag_duration
|
||||
|
||||
if frag_start >= section_start and frag_end <= section_end:
|
||||
last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
|
||||
|
||||
yield {
|
||||
'url': last_segment_url,
|
||||
'fragment_count': fragment_count,
|
||||
'duration': frag_duration,
|
||||
'start': frag_start,
|
||||
'end': frag_end,
|
||||
}
|
||||
|
||||
if known_idx == last_seq:
|
||||
no_fragment_score += 5
|
||||
else:
|
||||
|
@ -4170,6 +4199,9 @@ def build_fragments(f):
|
|||
dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
|
||||
yield dct
|
||||
|
||||
if live_status == 'is_live' and self.get_param('download_ranges') and not self.get_param('live_from_start'):
|
||||
self.report_warning('For YT livestreams, --download-sections is only supported with --live-from-start')
|
||||
|
||||
needs_live_processing = self._needs_live_processing(live_status, duration)
|
||||
skip_bad_formats = 'incomplete' not in format_types
|
||||
if self._configuration_arg('include_incomplete_formats'):
|
||||
|
|
|
@ -429,7 +429,14 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
general.add_option(
|
||||
'--live-from-start',
|
||||
action='store_true', dest='live_from_start',
|
||||
help='Download livestreams from the start. Currently only supported for YouTube (Experimental)')
|
||||
help=('Download livestreams from the start. Currently only supported for YouTube (Experimental). '
|
||||
'Time ranges can be specified using --download-sections to download only a part of the stream. '
|
||||
'Negative values are allowed for specifying a relative previous time, using the # syntax '
|
||||
'e.g. --download-sections "#-24hours - 0" (download last 24 hours), '
|
||||
'e.g. --download-sections "#-1h - 30m" (download from 1 hour ago until the next 30 minutes), '
|
||||
'e.g. --download-sections "#-3days - -2days" (download from 3 days ago until 2 days ago). '
|
||||
'It is also possible to specify an exact unix timestamp range, using the * syntax, '
|
||||
'e.g. --download-sections "*1672531200 - 1672549200" (download between those two timestamps)'))
|
||||
general.add_option(
|
||||
'--no-live-from-start',
|
||||
action='store_false', dest='live_from_start',
|
||||
|
|
|
@ -1250,7 +1250,7 @@ def unified_strdate(date_str, day_first=True):
|
|||
return str(upload_date)
|
||||
|
||||
|
||||
def unified_timestamp(date_str, day_first=True):
|
||||
def unified_timestamp(date_str, day_first=True, with_milliseconds=False):
|
||||
if not isinstance(date_str, str):
|
||||
return None
|
||||
|
||||
|
@ -1276,7 +1276,7 @@ def unified_timestamp(date_str, day_first=True):
|
|||
for expression in date_formats(day_first):
|
||||
with contextlib.suppress(ValueError):
|
||||
dt_ = dt.datetime.strptime(date_str, expression) - timezone + dt.timedelta(hours=pm_delta)
|
||||
return calendar.timegm(dt_.timetuple())
|
||||
return calendar.timegm(dt_.timetuple()) + (dt_.microsecond / 1e6 if with_milliseconds else 0)
|
||||
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
|
@ -2071,16 +2071,19 @@ def parse_duration(s):
|
|||
|
||||
days, hours, mins, secs, ms = [None] * 5
|
||||
m = re.match(r'''(?x)
|
||||
(?P<sign>[+-])?
|
||||
(?P<before_secs>
|
||||
(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
|
||||
(?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
|
||||
(?P<ms>[.:][0-9]+)?Z?$
|
||||
''', s)
|
||||
if m:
|
||||
days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
|
||||
sign, days, hours, mins, secs, ms = m.group('sign', 'days', 'hours', 'mins', 'secs', 'ms')
|
||||
else:
|
||||
m = re.match(
|
||||
r'''(?ix)(?:P?
|
||||
r'''(?ix)(?:
|
||||
(?P<sign>[+-])?
|
||||
P?
|
||||
(?:
|
||||
[0-9]+\s*y(?:ears?)?,?\s*
|
||||
)?
|
||||
|
@ -2104,17 +2107,19 @@ def parse_duration(s):
|
|||
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
|
||||
)?Z?$''', s)
|
||||
if m:
|
||||
days, hours, mins, secs, ms = m.groups()
|
||||
sign, days, hours, mins, secs, ms = m.groups()
|
||||
else:
|
||||
m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
|
||||
m = re.match(r'(?i)(?P<sign>[+-])?(?:(?P<days>[0-9.]+)\s*(?:days?)|(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
|
||||
if m:
|
||||
hours, mins = m.groups()
|
||||
sign, days, hours, mins = m.groups()
|
||||
else:
|
||||
return None
|
||||
|
||||
sign = -1 if sign == '-' else 1
|
||||
|
||||
if ms:
|
||||
ms = ms.replace(':', '.')
|
||||
return sum(float(part or 0) * mult for part, mult in (
|
||||
return sign * sum(float(part or 0) * mult for part, mult in (
|
||||
(days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user