From e3ea47908747bff4b46b4000fb1de944b400c21a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 6 Sep 2013 16:24:24 +0200 Subject: [PATCH] [youtube] Fix some issues with the detection of playlist/channel urls (reported in #1374) They were being caught by YoutubeUserIE, now it only extracts a url if the rest of extractors aren't suitable. Now the url tests check that the urls can only be extracted with an specific extractor. --- test/test_all_urls.py | 27 ++++++++++++++++----------- youtube_dl/extractor/youtube.py | 8 ++++++-- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 219c453afb..5d8d93e0e9 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -21,14 +21,15 @@ def assertMatch(self, url, ie_list): self.assertEqual(self.matching_ies(url), ie_list) def test_youtube_playlist_matching(self): - self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) - self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585 - self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958')) - self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')) - self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) - self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')) - self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 - self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M')) + assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) + assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585 + assertPlaylist(u'PL63F0C78739B09958') + assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') + assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') + assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668 + self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M')) def test_youtube_matching(self): self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M')) @@ -37,9 +38,10 @@ def test_youtube_matching(self): self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) def test_youtube_channel_matching(self): - self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM')) - self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')) - self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')) + assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') def test_youtube_user_matching(self): self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) @@ -50,6 +52,9 @@ def test_youtube_feeds(self): self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) + def test_youtube_show_matching(self): + self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) + def test_justin_tv_channelid_matching(self): self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 62aecea022..423a5e973c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -386,7 +386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): @classmethod def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" - if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False + if YoutubePlaylistIE.suitable(url): return False return re.match(cls._VALID_URL, url, re.VERBOSE) is not None def report_video_webpage_download(self, video_id): @@ -1021,8 +1021,12 @@ class YoutubeUserIE(InfoExtractor): _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' IE_NAME = u'youtube:user' + @classmethod def suitable(cls, url): - if YoutubeIE.suitable(url) or YoutubeFavouritesIE.suitable(url): return False + # Don't return True if the url can be extracted with other youtube + # extractor, the regex would is too permissive and it would match. + other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls) + if any(ie.suitable(url) for ie in other_ies): return False else: return super(YoutubeUserIE, cls).suitable(url) def _real_extract(self, url):