[sohu] fix extractor conflict

certain URL would cause conflict between SohuPlaylistIE and SohuIE
This commit is contained in:
Yukan Zhang 2021-06-10 00:58:34 -07:00
parent 22c0a90a7a
commit 69a40d3eb0

View File

@ -262,7 +262,7 @@ class SohuIE(InfoExtractor):
class SohuPlaylistIE(InfoExtractor): class SohuPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:my\.)?tv\.sohu\.com/pl/(?P<pl_id>\d+)' _VALID_URL = r'https?://(?:my\.)?tv\.sohu\.com/pl/(?P<pl_id>\d+)$'
_URL_IN_PLAYLIST = re.compile(r'<strong>.*?</strong>') _URL_IN_PLAYLIST = re.compile(r'<strong>.*?</strong>')
parser = HTMLAttributeParser() parser = HTMLAttributeParser()
_TESTS = [{ _TESTS = [{
@ -311,14 +311,13 @@ class SohuPlaylistIE(InfoExtractor):
def _get_all_pages_in_playlist(self, first_page, url): def _get_all_pages_in_playlist(self, first_page, url):
pgcount = int(self._search_regex(r'var pgcount = \'(\d+)\'', first_page, 'pgcount')) pgcount = int(self._search_regex(r'var pgcount = \'(\d+)\'', first_page, 'pgcount'))
pgsize = int(self._search_regex(r'var pgsize = \'(\d+)\'', first_page, 'pgsize')) pgsize = int(self._search_regex(r'var pgsize = \'(\d+)\'', first_page, 'pgsize'))
return [url + '/index%d.shtml' % (i+1) for i in range(0, math.ceil(pgcount/pgsize))] return [url + '/index%d.shtml' % (i + 1) for i in range(0, math.ceil(pgcount / pgsize))]
def _get_video_list(self, all_pages, playlist_id): def _get_video_list(self, all_pages, playlist_id):
video_list = '' video_list = ''
for i, url in enumerate(all_pages): for i, url in enumerate(all_pages):
webpage = self._download_webpage(url, "playlist " + playlist_id + " page: %d" % (1+i)) webpage = self._download_webpage(url, "playlist " + playlist_id + " page: %d" % (1 + i))
video_list += self._search_regex( video_list += self._search_regex(
r'<ul class="uList cfix">(.*?)</ul>', r'<ul class="uList cfix">(.*?)</ul>',
webpage, 'video list', NO_DEFAULT, True, re.DOTALL) webpage, 'video list', NO_DEFAULT, True, re.DOTALL)
return video_list return video_list