Add InstagramIE (related #904)

2024-11-23 11:31:47 +01:00 · 2013-07-01 21:08:54 +02:00 · 2013-07-01 21:08:54 +02:00 · 59fc531f78
commit 59fc531f78
parent 5c44c15438
2 changed files with 43 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -25,6 +25,7 @@ from .howcast import HowcastIE
 from .hypem import HypemIE
 from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .keek import KeekIE
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -0,0 +1,42 @@
 import re
 from .common import InfoExtractor
 class InstagramIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
    _TEST = {
        u'url': u'http://instagram.com/p/aye83DjauH/#',
        u'file': u'aye83DjauH.mp4',
        u'md5': u'0d2da106a9d2631273e192b372806516',
        u'info_dict': {
            u"uploader_id": u"naomipq", 
            u"title": u"Video by naomipq"
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
        video_url = self._html_search_regex(
            r'<meta property="og:video" content="(.+?)"',
            webpage, u'video URL')
        thumbnail_url = self._html_search_regex(
            r'<meta property="og:image" content="(.+?)" />',
            webpage, u'thumbnail URL', fatal=False)
        html_title = self._html_search_regex(
            r'<title>(.+?)</title>',
            webpage, u'title', flags=re.DOTALL)
        title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
        uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram',
            webpage, u'uploader name', fatal=False)
        ext = 'mp4'
        return [{
            'id':        video_id,
            'url':       video_url,
            'ext':       ext,
            'title':     title,
            'thumbnail': thumbnail_url,
            'uploader_id' : uploader_id
        }]