diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index eee2a4a2dd..cd936e7e5f 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -89,8 +89,12 @@ def __init__(self, parser): )) +# While the specification +# prescribes that hours must be *2 or more* digits, timestamps with a single +# digit for the hour part has been seen in the wild. +# See https://github.com/yt-dlp/yt-dlp/issues/921 _REGEX_TS = re.compile(r'''(?x) - (?:([0-9]{2,}):)? + (?:([0-9]{1,}):)? ([0-9]{2}): ([0-9]{2})\. ([0-9]{3})? @@ -172,6 +176,7 @@ class Magic(HeaderBlock): _REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=') _REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:') _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)') + _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*') @classmethod def __parse_tsmap(cls, parser): @@ -194,7 +199,7 @@ def __parse_tsmap(cls, parser): raise ParseError(parser) else: raise ParseError(parser) - if parser.consume(','): + if parser.consume(cls._REGEX_TSMAP_SEP): continue if parser.consume(_REGEX_NL): break