Handle removed content

This commit is contained in:
ringus1 2024-01-11 16:02:41 +01:00
parent 4f58aabe67
commit 6be151fed7

View File

@ -434,12 +434,12 @@ def _extract_from_url(self, url, video_id):
webpage = self._download_webpage( webpage = self._download_webpage(
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
if (self.get_param("username") and self.get_param("password")) or self.get_param("cookiefile"): if (self.get_param("username") and self.get_param("password")) or self.get_param("cookiefile"):
if 'We\'ve suspended your account' in webpage: if 'We\'ve suspended your account' in webpage:
raise ExtractorError('Login account is suspended.', expected=True) raise ExtractorError('Login account is suspended.', expected=True)
sjs_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
userinfo = get_first(sjs_data, ('require', ..., ..., ..., "__bbox", "define", lambda _, v: 'CurrentUserInitialData' in v, lambda _, v: 'ACCOUNT_ID' in v)) userinfo = get_first(sjs_data, ('require', ..., ..., ..., "__bbox", "define", lambda _, v: 'CurrentUserInitialData' in v, lambda _, v: 'ACCOUNT_ID' in v))
try: try:
user_id = int(userinfo['ACCOUNT_ID']) user_id = int(userinfo['ACCOUNT_ID'])
@ -448,6 +448,10 @@ def _extract_from_url(self, url, video_id):
if user_id == 0: if user_id == 0:
raise ExtractorError('Failed to login with provided data.', expected=True) raise ExtractorError('Failed to login with provided data.', expected=True)
if props := get_first(sjs_data, ('require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., 'rootView', 'props'), expected_type=dict, default={}):
if props.get('title') == 'This content isn\'t available at the moment':
raise ExtractorError('Content removed. Facebook said: "%s"' % props.get('body', ''), expected=True)
def extract_metadata(webpage): def extract_metadata(webpage):
post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall( post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)] r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]