Compare commits

...

3 Commits

Author SHA1 Message Date
dirkf
ae8ba2c319 [YouTube] Fix KeyError QV in signature extraction failed
* temporarily force missing global definition into sig JS
* improve test: thanks https://github.com/yt-dlp/yt-dlp/issues/7327#issuecomment-1595274615
* resolves #32314
2023-06-17 15:55:19 +01:00
dirkf
d6433cbb2c [jsinterp] Don't find unrelated objects 2023-06-17 15:46:12 +01:00
dirkf
ff75c300f5 [jsinterp] Fix test for failed match in extract_object() 2023-06-17 15:34:11 +01:00
3 changed files with 19 additions and 10 deletions

View File

@ -63,6 +63,11 @@ _SIG_TESTS = [
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
),
(
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
) )
] ]
@ -231,7 +236,7 @@ def n_sig(jscode, sig_input):
make_sig_test = t_factory( make_sig_test = t_factory(
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) 'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
for test_spec in _SIG_TESTS: for test_spec in _SIG_TESTS:
make_sig_test(*test_spec) make_sig_test(*test_spec)

View File

@ -1569,8 +1569,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig') jscode, 'Initial JS player signature function name', group='sig')
jsi = JSInterpreter(jscode) # temporary (please) hack for player 6ed0d907 #32314
ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};'
jsi = JSInterpreter(ah + jscode)
initial_function = jsi.extract_function(funcname) initial_function = jsi.extract_function(funcname)
return lambda s: initial_function([s]) return lambda s: initial_function([s])
def _decrypt_signature(self, s, video_id, player_url): def _decrypt_signature(self, s, video_id, player_url):

View File

@ -941,15 +941,15 @@ class JSInterpreter(object):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
obj = {} obj = {}
obj_m = re.search( obj_m = re.search(
r'''(?x) r'''(?xs)
(?<!this\.)%s\s*=\s*{\s* (?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) (?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
}\s*; }}\s*);
''' % (re.escape(objname), _FUNC_NAME_RE), '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
self.code) self.code)
if not obj_m: fields = obj_m and obj_m.group('fields')
if fields is None:
raise self.Exception('Could not find object ' + objname) raise self.Exception('Could not find object ' + objname)
fields = obj_m.group('fields')
# Currently, it only supports function definitions # Currently, it only supports function definitions
fields_m = re.finditer( fields_m = re.finditer(
r'''(?x) r'''(?x)
@ -985,9 +985,9 @@ class JSInterpreter(object):
\((?P<args>[^)]*)\)\s* \((?P<args>[^)]*)\)\s*
(?P<code>{.+})''' % {'name': re.escape(funcname)}, (?P<code>{.+})''' % {'name': re.escape(funcname)},
self.code) self.code)
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
if func_m is None: if func_m is None:
raise self.Exception('Could not find JS function "{funcname}"'.format(**locals())) raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
return self.build_arglist(func_m.group('args')), code return self.build_arglist(func_m.group('args')), code
def extract_function(self, funcname): def extract_function(self, funcname):