[extractor/NiconicoSeries] Fix extraction (#6898)

Authored by: sqrtNOT
This commit is contained in:
sqrtNOT 2023-04-25 10:21:06 +00:00 committed by GitHub
parent 9b30cd3dfc
commit c86e433c35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -660,10 +660,10 @@ def _real_extract(self, url):
class NiconicoSeriesIE(InfoExtractor): class NiconicoSeriesIE(InfoExtractor):
IE_NAME = 'niconico:series' IE_NAME = 'niconico:series'
_VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)' _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp(?:/user/\d+)?|nico\.ms)/series/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.nicovideo.jp/series/110226', 'url': 'https://www.nicovideo.jp/user/44113208/series/110226',
'info_dict': { 'info_dict': {
'id': '110226', 'id': '110226',
'title': 'ご立派ァ!のシリーズ', 'title': 'ご立派ァ!のシリーズ',
@ -683,7 +683,7 @@ class NiconicoSeriesIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id) webpage = self._download_webpage(url, list_id)
title = self._search_regex( title = self._search_regex(
(r'<title>「(.+)(全', (r'<title>「(.+)(全',
@ -691,10 +691,9 @@ def _real_extract(self, url):
webpage, 'title', fatal=False) webpage, 'title', fatal=False)
if title: if title:
title = unescapeHTML(title) title = unescapeHTML(title)
playlist = [ json_data = next(self._yield_json_ld(webpage, None, fatal=False))
self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id) return self.playlist_from_matches(
for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)] traverse_obj(json_data, ('itemListElement', ..., 'url')), list_id, title, ie=NiconicoIE)
return self.playlist_result(playlist, list_id, title)
class NiconicoHistoryIE(NiconicoPlaylistBaseIE): class NiconicoHistoryIE(NiconicoPlaylistBaseIE):