diff --git a/test/test_utils.py b/test/test_utils.py index 270669044f..f3fbff042c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 # Allow direct execution import os @@ -21,6 +22,8 @@ find_xpath_attr, get_meta_content, xpath_with_ns, + smuggle_url, + unsmuggle_url, ) if sys.version_info < (3, 0): @@ -155,5 +158,18 @@ def test_xpath_with_ns(self): self.assertEqual(find('media:song/media:author').text, u'The Author') self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') + def test_smuggle_url(self): + data = {u"ö": u"ö", u"abc": [3]} + url = 'https://foo.bar/baz?x=y#a' + smug_url = smuggle_url(url, data) + unsmug_url, unsmug_data = unsmuggle_url(smug_url) + self.assertEqual(url, unsmug_url) + self.assertEqual(data, unsmug_data) + + res_url, res_data = unsmuggle_url(url) + self.assertEqual(res_url, url) + self.assertEqual(res_data, None) + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d48c84f8d5..89805250cf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,6 +11,8 @@ compat_urlparse, ExtractorError, + smuggle_url, + unescapeHTML, ) from .brightcove import BrightcoveIE @@ -29,6 +31,17 @@ class GenericIE(InfoExtractor): u"title": u"R\u00e9gis plante sa Jeep" } }, + # embedded vimeo video + { + u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references', + u'file': u'22444065.mp4', + u'md5': u'2903896e23df39722c33f015af0666e2', + u'info_dict': { + u'title': u'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011', + u"uploader_id": u"skillsmatter", + u"uploader": u"Skills Matter", + } + } ] def report_download_webpage(self, video_id): @@ -127,6 +140,14 @@ def _real_extract(self, url): bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) return self.url_result(bc_url, 'Brightcove') + # Look for embedded Vimeo player + mobj = re.search( + r'