[ie/Newgrounds:user] Fix extractor (#9046)

Closes #7308
Authored by: u-spec-png
This commit is contained in:
u-spec-png 2024-01-21 19:50:14 +01:00 committed by GitHub
parent 9f1e9dab21
commit 3e083191cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3,15 +3,15 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
OnDemandPagedList,
clean_html, clean_html,
extract_attributes, extract_attributes,
get_element_by_id, get_element_by_id,
int_or_none, int_or_none,
parse_count, parse_count,
parse_duration, parse_duration,
traverse_obj,
unified_timestamp, unified_timestamp,
OnDemandPagedList,
try_get,
) )
@ -263,19 +263,16 @@ class NewgroundsUserIE(InfoExtractor):
def _fetch_page(self, channel_id, url, page): def _fetch_page(self, channel_id, url, page):
page += 1 page += 1
posts_info = self._download_json( posts_info = self._download_json(
f'{url}/page/{page}', channel_id, f'{url}?page={page}', channel_id,
note=f'Downloading page {page}', headers={ note=f'Downloading page {page}', headers={
'Accept': 'application/json, text/javascript, */*; q = 0.01', 'Accept': 'application/json, text/javascript, */*; q = 0.01',
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
}) })
sequence = posts_info.get('sequence', []) for post in traverse_obj(posts_info, ('items', ..., ..., {str})):
for year in sequence: path, media_id = self._search_regex(
posts = try_get(posts_info, lambda x: x['years'][str(year)]['items']) r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
for post in posts: post, 'url', group=(1, 2))
path, media_id = self._search_regex( yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
post, 'url', group=(1, 2))
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url) channel_id = self._match_id(url)