From 0468a3b3253957bfbeb98b4a7c71542ff80e9e06 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 11 Oct 2022 07:59:27 +0530 Subject: [PATCH] [jsinterp] Improve separating regex Fixes https://github.com/yt-dlp/yt-dlp/issues/4635#issuecomment-1273974909 --- test/test_jsinterp.py | 5 +++++ test/test_youtube_signature.py | 4 ++++ yt_dlp/extractor/youtube.py | 2 +- yt_dlp/jsinterp.py | 6 ++++-- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 92ef532f56..3c4391c4ab 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -392,6 +392,11 @@ def test_regex(self): ''') self.assertEqual(jsi.call_function('x').pattern, r',][}",],()}(\[)') + jsi = JSInterpreter(R''' + function x() { let a=[/[)\\]/]; return a[0]; } + ''') + self.assertEqual(jsi.call_function('x').pattern, r'[)\\]') + def test_char_code_at(self): jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}') self.assertEqual(jsi.call_function('x', 0), 116) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index c3dcb4d68f..6d753fbf09 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -130,6 +130,10 @@ 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js', 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ', ), + ( + 'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js', + 'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w', + ), ] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 6f153bb3cf..35e41753a2 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2832,7 +2832,7 @@ def _decrypt_nsig(self, s, video_id, player_url): self.report_warning( f'Native nsig extraction failed: Trying with PhantomJS\n' f' n = {s} ; player = {player_url}', video_id) - self.write_debug(e) + self.write_debug(e, only_once=True) args, func_body = func_code ret = jsi.execute( diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 4caad6f743..e25997129d 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -236,7 +236,7 @@ def _regex_flags(cls, expr): @staticmethod def _separate(expr, delim=',', max_split=None): - OP_CHARS = '+-*/%&|^=<>!,;{}:' + OP_CHARS = '+-*/%&|^=<>!,;{}:[' if not expr: return counters = {k: 0 for k in _MATCHING_PARENS.values()} @@ -246,7 +246,9 @@ def _separate(expr, delim=',', max_split=None): if not in_quote and char in _MATCHING_PARENS: counters[_MATCHING_PARENS[char]] += 1 elif not in_quote and char in counters: - counters[char] -= 1 + # Something's wrong if we get negative, but ignore it anyway + if counters[char]: + counters[char] -= 1 elif not escaping: if char in _QUOTES and in_quote in (char, None): if in_quote or after_op or char != '/':