From 4be9f8c814c328213c8f60ecbb1e4d4e205b950e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Apr 2014 01:25:24 +0200 Subject: [PATCH] [ninegag] Add support for p/ URLs --- test/test_all_urls.py | 1 + youtube_dl/extractor/ninegag.py | 34 +++++++++++++++++++++++++++------ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 479d21b054..4de2c395ea 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -172,5 +172,6 @@ def test_yahoo_https(self): 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', ['Yahoo']) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index b8c892ccef..3352b83301 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -7,9 +7,14 @@ class NineGagIE(InfoExtractor): IE_NAME = '9gag' - _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P[0-9]+)' + _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/ + (?: + v/(?P[0-9]+)| + p/(?P[a-zA-Z0-9]+)/(?P[^?#/]+) + ) + ''' - _TEST = { + _TESTS = [{ "url": "http://9gag.tv/v/1912", "info_dict": { "id": "1912", @@ -20,17 +25,33 @@ class NineGagIE(InfoExtractor): "thumbnail": "re:^https?://", }, 'add_ie': ['Youtube'] - } + }, + { + 'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar', + 'info_dict': { + 'id': 'KklwM', + 'ext': 'mp4', + 'display_id': 'alternate-banned-opening-scene-of-gravity', + "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.", + 'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie", + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = mobj.group('numid') or mobj.group('id') + display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) youtube_id = self._html_search_regex( r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"', webpage, 'video ID') + title = self._html_search_regex( + r'(?s)id="jsid-video-post-container".*?data-title="([^"]+)"', + webpage, 'title', default=None) + if not title: + title = self._og_search_title(webpage) description = self._html_search_regex( r'(?s)
.*?

(.*?)

', webpage, 'description', fatal=False) @@ -46,7 +67,8 @@ def _real_extract(self, url): 'url': youtube_id, 'ie_key': 'Youtube', 'id': video_id, - 'title': self._og_search_title(webpage), + 'display_id': display_id, + 'title': title, 'description': description, 'view_count': view_count, 'thumbnail': self._og_search_thumbnail(webpage),