Determine merge container better (See desc) (#1482)

* Determine the container early. Closes #4069
* Use codecs instead of just file extensions
* Obey `--prefer-free-formats`
* Allow fallbacks in `--merge-output`

Authored by: pukkandan, selfisekai
This commit is contained in:
Lauren N. Liberda 2022-08-04 02:42:12 +02:00 committed by GitHub
parent fe0918bb65
commit fc61aff41b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 84 additions and 39 deletions

View File

@ -858,10 +858,10 @@ ## Video Format Options:
downloadable
-F, --list-formats List available formats of each video.
Simulate unless --no-simulate is used
--merge-output-format FORMAT Container to use when merging formats (e.g.
bestvideo+bestaudio). Ignored if no merge is
required. (currently supported: avi, flv,
mkv, mov, mp4, webm)
--merge-output-format FORMAT Containers that may be used when merging
formats, separated by "/" (Eg: "mp4/mkv").
Ignored if no merge is required. (currently
supported: avi, flv, mkv, mov, mp4, webm)
## Subtitle Options:
--write-subs Write subtitle file

View File

@ -53,6 +53,7 @@
fix_xml_ampersands,
float_or_none,
format_bytes,
get_compatible_ext,
get_element_by_attribute,
get_element_by_class,
get_element_html_by_attribute,
@ -1843,6 +1844,31 @@ def test_determine_file_encoding(self):
self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
def test_get_compatible_ext(self):
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm')
self.assertEqual(get_compatible_ext(
vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4')
self.assertEqual(get_compatible_ext(
vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm')
self.assertEqual(get_compatible_ext(
vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4')
self.assertEqual(get_compatible_ext(
vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
if __name__ == '__main__':
unittest.main()

View File

@ -102,6 +102,7 @@
format_decimal_suffix,
format_field,
formatSeconds,
get_compatible_ext,
get_domain,
int_or_none,
iri_to_uri,
@ -134,6 +135,7 @@
timetuple_from_msec,
to_high_limit_path,
traverse_obj,
try_call,
try_get,
url_basename,
variadic,
@ -372,7 +374,7 @@ class YoutubeDL:
Progress hooks are guaranteed to be called at least twice
(with status "started" and "finished") if the processing is successful.
merge_output_format: Extension to use when merging formats.
merge_output_format: "/" separated list of extensions to use when merging formats.
final_ext: Expected final extension; used to detect when the file was
already downloaded and converted
fixup: Automatically correct known faults of the file.
@ -2088,14 +2090,13 @@ def _merge(formats_pair):
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
output_ext = self.params.get('merge_output_format')
if not output_ext:
if the_only_video:
output_ext = the_only_video['ext']
elif the_only_audio and not video_fmts:
output_ext = the_only_audio['ext']
else:
output_ext = 'mkv'
output_ext = get_compatible_ext(
vcodecs=[f.get('vcodec') for f in video_fmts],
acodecs=[f.get('acodec') for f in audio_fmts],
vexts=[f['ext'] for f in video_fmts],
aexts=[f['ext'] for f in audio_fmts],
preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
@ -3067,33 +3068,9 @@ def existing_video_file(*filepaths):
return
if info_dict.get('requested_formats') is not None:
def compatible_formats(formats):
# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
video_formats = [format for format in formats if format.get('vcodec') != 'none']
audio_formats = [format for format in formats if format.get('acodec') != 'none']
if len(video_formats) > 2 or len(audio_formats) > 2:
return False
# Check extension
exts = {format.get('ext') for format in formats}
COMPATIBLE_EXTS = (
{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
{'webm'},
)
for ext_sets in COMPATIBLE_EXTS:
if ext_sets.issuperset(exts):
return True
# TODO: Check acodec/vcodec
return False
requested_formats = info_dict['requested_formats']
old_ext = info_dict['ext']
if self.params.get('merge_output_format') is None:
if not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
self.report_warning(
'Requested formats are incompatible for merge and will be merged into mkv')
if (info_dict['ext'] == 'webm'
and info_dict.get('thumbnails')
# check with type instead of pp_key, __name__, or isinstance

View File

@ -228,7 +228,8 @@ def validate_minmax(min_val, max_val, min_name, max_name=None):
validate_regex('format sorting', f, InfoExtractor.FormatSort.regex)
# Postprocessor formats
validate_in('merge output format', opts.merge_output_format, FFmpegMergerPP.SUPPORTED_EXTS)
validate_regex('merge output format', opts.merge_output_format,
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE)

View File

@ -782,7 +782,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'--merge-output-format',
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
help=(
'Container to use when merging formats (e.g. bestvideo+bestaudio). Ignored if no merge is required. '
'Containers that may be used when merging formats, separated by "/" (Eg: "mp4/mkv"). '
'Ignored if no merge is required. '
f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})'))
video_format.add_option(
'--allow-unplayable-formats',

View File

@ -3456,6 +3456,46 @@ def parse_codecs(codecs_str):
return {}
def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
allow_mkv = not preferences or 'mkv' in preferences
if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
return 'mkv' # TODO: any other format allows this?
# TODO: All codecs supported by parse_codecs isn't handled here
COMPATIBLE_CODECS = {
'mp4': {
'av1', 'hevc', 'avc1', 'mp4a', # fourcc (m3u8, mpd)
'h264', 'aacl', # Set in ISM
},
'webm': {
'av1', 'vp9', 'vp8', 'opus', 'vrbs',
'vp9x', 'vp8x', # in the webm spec
},
}
sanitize_codec = functools.partial(try_get, getter=lambda x: x.split('.')[0].replace('0', ''))
vcodec, acodec = sanitize_codec(vcodecs[0]), sanitize_codec(acodecs[0])
for ext in preferences or COMPATIBLE_CODECS.keys():
codec_set = COMPATIBLE_CODECS.get(ext, set())
if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
return ext
COMPATIBLE_EXTS = (
{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
{'webm'},
)
for ext in preferences or vexts:
current_exts = {ext, *vexts, *aexts}
if ext == 'mkv' or current_exts == {ext} or any(
ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
return ext
return 'mkv' if allow_mkv else preferences[-1]
def urlhandle_detect_ext(url_handle):
getheader = url_handle.headers.get