yt-dlp/youtube_dl/extractor/laola1tv.py

# coding: utf-8
from __future__ import unicode_literals

import json

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    unified_strdate,
    urlencode_postdata,
    xpath_element,
    xpath_text,
    urljoin,
    update_url_query,
    js_to_json,
)


class Laola1TvEmbedIE(InfoExtractor):
    IE_NAME = 'laola1tv:embed'
    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
    _TESTS = [{
        # flashvars.premium = "false";
        'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
        'info_dict': {
            'id': '708065',
            'ext': 'mp4',
            'title': 'MA Long CHN - FAN Zhendong CHN',
            'uploader': 'ITTF - International Table Tennis Federation',
            'upload_date': '20161211',
        },
    }]

    def _extract_token_url(self, stream_access_url, video_id, data):
        return self._download_json(
            stream_access_url, video_id, headers={
                'Content-Type': 'application/json',
            }, data=json.dumps(data).encode())['data']['stream-access'][0]

    def _extract_formats(self, token_url, video_id):
        token_doc = self._download_xml(
            token_url, video_id, 'Downloading token',
            headers=self.geo_verification_headers())

        token_attrib = xpath_element(token_doc, './/token').attrib

        if token_attrib['status'] != '0':
            raise ExtractorError(
                'Token error: %s' % token_attrib['comment'], expected=True)

        formats = self._extract_akamai_formats(
            '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
            video_id)
        self._sort_formats(formats)
        return formats

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        flash_vars = self._search_regex(
            r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')

        def get_flashvar(x, *args, **kwargs):
            flash_var = self._search_regex(
                r'%s\s*:\s*"([^"]+)"' % x,
                flash_vars, x, default=None)
            if not flash_var:
                flash_var = self._search_regex([
                    r'flashvars\.%s\s*=\s*"([^"]+)"' % x,
                    r'%s\s*=\s*"([^"]+)"' % x],
                    webpage, x, *args, **kwargs)
            return flash_var

        hd_doc = self._download_xml(
            'http://www.laola1.tv/server/hd_video.php', video_id, query={
                'play': get_flashvar('streamid'),
                'partner': get_flashvar('partnerid'),
                'portal': get_flashvar('portalid'),
                'lang': get_flashvar('sprache'),
                'v5ident': '',
            })

        _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
        title = _v('title', fatal=True)

        token_url = None
        premium = get_flashvar('premium', default=None)
        if premium:
            token_url = update_url_query(
                _v('url', fatal=True), {
                    'timestamp': get_flashvar('timestamp'),
                    'auth': get_flashvar('auth'),
                })
        else:
            data_abo = urlencode_postdata(
                dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
            stream_access_url = update_url_query(
                'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', {
                    'videoId': _v('id'),
                    'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
                    'label': _v('label'),
                    'area': _v('area'),
                })
            token_url = self._extract_token_url(stream_access_url, video_id, data_abo)

        formats = self._extract_formats(token_url, video_id)

        categories_str = _v('meta_sports')
        categories = categories_str.split(',') if categories_str else []
        is_live = _v('islive') == 'true'

        return {
            'id': video_id,
            'title': self._live_title(title) if is_live else title,
            'upload_date': unified_strdate(_v('time_date')),
            'uploader': _v('meta_organisation'),
            'categories': categories,
            'is_live': is_live,
            'formats': formats,
        }


class Laola1TvIE(Laola1TvEmbedIE):
    IE_NAME = 'laola1tv'
    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
        'info_dict': {
            'id': '227883',
            'display_id': 'straubing-tigers-koelner-haie',
            'ext': 'flv',
            'title': 'Straubing Tigers - Kölner Haie',
            'upload_date': '20140912',
            'is_live': False,
            'categories': ['Eishockey'],
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
        'info_dict': {
            'id': '464602',
            'display_id': 'straubing-tigers-koelner-haie',
            'ext': 'flv',
            'title': 'Straubing Tigers - Kölner Haie',
            'upload_date': '20160129',
            'is_live': False,
            'categories': ['Eishockey'],
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',
        'info_dict': {
            'id': '487850',
            'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',
            'ext': 'flv',
            'title': 'Belogorie BELGOROD - TRENTINO Diatec',
            'upload_date': '20160322',
            'uploader': 'CEV - Europäischer Volleyball Verband',
            'is_live': True,
            'categories': ['Volleyball'],
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'This live stream has already finished.',
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)

        webpage = self._download_webpage(url, display_id)

        if 'Dieser Livestream ist bereits beendet.' in webpage:
            raise ExtractorError('This live stream has already finished.', expected=True)

        conf = self._parse_json(self._search_regex(
            r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
            display_id, js_to_json)

        video_id = conf['videoid']

        config = self._download_json(conf['configUrl'], video_id, query={
            'videoid': video_id,
            'partnerid': conf['partnerid'],
            'language': conf.get('language', ''),
            'portal': conf.get('portalid', ''),
        })
        error = config.get('error')
        if error:
            raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)

        video_data = config['video']
        title = video_data['title']
        is_live = video_data.get('isLivestream') and video_data.get('isLive')
        meta = video_data.get('metaInformation')
        sports = meta.get('sports')
        categories = sports.split(',') if sports else []

        token_url = self._extract_token_url(
            video_data['streamAccess'], video_id,
            video_data['abo']['required'])

        formats = self._extract_formats(token_url, video_id)

        return {
            'id': video_id,
            'display_id': display_id,
            'title': self._live_title(title) if is_live else title,
            'description': video_data.get('description'),
            'thumbnail': video_data.get('image'),
            'categories': categories,
            'formats': formats,
            'is_live': is_live,
        }
[laola1tv] Improve (Closes #8478) 2016-02-14 18:40:26 +01:00			`# coding: utf-8`
[laola1tv] Add new extractor The extractor works fine, but the f4m downloader cannot handle the resulting bootstrap information. 2014-10-28 16:51:32 +01:00			`from __future__ import unicode_literals`

[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`import json`

[laola1tv] Add new extractor The extractor works fine, but the f4m downloader cannot handle the resulting bootstrap information. 2014-10-28 16:51:32 +01:00			`from .common import InfoExtractor`
[laola1tv] Improve (Closes #8478) 2016-02-14 18:40:26 +01:00			`from ..utils import (`
[laola1tv] Improve extraction and update test case (#3742) 2015-02-23 19:45:36 +01:00			`ExtractorError,`
[laola1tv] Fixes for changed site layout. * Fixed valid URLs (w/ tests). * Fixed iframe URL extraction. * Fixed token URL extraction. * Fixed variable extraction. * Fixed uploader spelling. * Added upload_date to result dictionary. 2016-02-09 16:25:30 +01:00			`unified_strdate,`
			`urlencode_postdata,`
[laola1tv] Improve (Closes #8478) 2016-02-14 18:40:26 +01:00			`xpath_element,`
			`xpath_text,`
[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00			`urljoin,`
[laola1] add support for another extraction scenario(closes #11460) 2016-12-20 18:22:57 +01:00			`update_url_query,`
[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`js_to_json,`
[laola1tv] Improve extraction and update test case (#3742) 2015-02-23 19:45:36 +01:00			`)`
[laola1tv] Add new extractor The extractor works fine, but the f4m downloader cannot handle the resulting bootstrap information. 2014-10-28 16:51:32 +01:00

[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00			`class Laola1TvEmbedIE(InfoExtractor):`
[laola1] add support for another extraction scenario(closes #11460) 2016-12-20 18:22:57 +01:00			`IE_NAME = 'laola1tv:embed'`
[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00			`_VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'`
[laola1tv:embed] Fix tests 2017-05-04 17:41:47 +02:00			`_TESTS = [{`
[laola1] add support for another extraction scenario(closes #11460) 2016-12-20 18:22:57 +01:00			`# flashvars.premium = "false";`
			`'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',`
			`'info_dict': {`
			`'id': '708065',`
			`'ext': 'mp4',`
			`'title': 'MA Long CHN - FAN Zhendong CHN',`
			`'uploader': 'ITTF - International Table Tennis Federation',`
			`'upload_date': '20161211',`
			`},`
[laola1tv:embed] Fix tests 2017-05-04 17:41:47 +02:00			`}]`
[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00
[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`def _extract_token_url(self, stream_access_url, video_id, data):`
			`return self._download_json(`
			`stream_access_url, video_id, headers={`
			`'Content-Type': 'application/json',`
			`}, data=json.dumps(data).encode())['data']['stream-access'][0]`

			`def _extract_formats(self, token_url, video_id):`
			`token_doc = self._download_xml(`
			`token_url, video_id, 'Downloading token',`
			`headers=self.geo_verification_headers())`

			`token_attrib = xpath_element(token_doc, './/token').attrib`

			`if token_attrib['status'] != '0':`
			`raise ExtractorError(`
			`'Token error: %s' % token_attrib['comment'], expected=True)`

			`formats = self._extract_akamai_formats(`
			`'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),`
			`video_id)`
			`self._sort_formats(formats)`
			`return formats`

[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
			`webpage = self._download_webpage(url, video_id)`
			`flash_vars = self._search_regex(`
			`r'(?s)flashvars\s=\s({.+?});', webpage, 'flash vars')`
[laola1] add support for another extraction scenario(closes #11460) 2016-12-20 18:22:57 +01:00
			`def get_flashvar(x, args, *kwargs):`
			`flash_var = self._search_regex(`
			`r'%s\s:\s"([^"]+)"' % x,`
			`flash_vars, x, default=None)`
			`if not flash_var:`
			`flash_var = self._search_regex([`
			`r'flashvars\.%s\s=\s"([^"]+)"' % x,`
			`r'%s\s=\s"([^"]+)"' % x],`
			`webpage, x, args, *kwargs)`
			`return flash_var`
[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00
			`hd_doc = self._download_xml(`
			`'http://www.laola1.tv/server/hd_video.php', video_id, query={`
			`'play': get_flashvar('streamid'),`
			`'partner': get_flashvar('partnerid'),`
			`'portal': get_flashvar('portalid'),`
			`'lang': get_flashvar('sprache'),`
			`'v5ident': '',`
			`})`

			`_v = lambda x, k: xpath_text(hd_doc, './/video/' + x, k)`
			`title = _v('title', fatal=True)`

[laola1] add support for another extraction scenario(closes #11460) 2016-12-20 18:22:57 +01:00			`token_url = None`
			`premium = get_flashvar('premium', default=None)`
			`if premium:`
			`token_url = update_url_query(`
			`_v('url', fatal=True), {`
			`'timestamp': get_flashvar('timestamp'),`
			`'auth': get_flashvar('auth'),`
			`})`
			`else:`
			`data_abo = urlencode_postdata(`
			`dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))`
[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`stream_access_url = update_url_query(`
			`'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', {`
[laola1] add support for another extraction scenario(closes #11460) 2016-12-20 18:22:57 +01:00			`'videoId': _v('id'),`
			`'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),`
			`'label': _v('label'),`
			`'area': _v('area'),`
[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`})`
			`token_url = self._extract_token_url(stream_access_url, video_id, data_abo)`
[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00
[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`formats = self._extract_formats(token_url, video_id)`
[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00
			`categories_str = _v('meta_sports')`
			`categories = categories_str.split(',') if categories_str else []`
			`is_live = _v('islive') == 'true'`

			`return {`
			`'id': video_id,`
			`'title': self._live_title(title) if is_live else title,`
			`'upload_date': unified_strdate(_v('time_date')),`
			`'uploader': _v('meta_organisation'),`
			`'categories': categories,`
			`'is_live': is_live,`
			`'formats': formats,`
			`}`


[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`class Laola1TvIE(Laola1TvEmbedIE):`
[laola1] add support for another extraction scenario(closes #11460) 2016-12-20 18:22:57 +01:00			`IE_NAME = 'laola1tv'`
[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00			`_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'`
[laola1tv] Fixes for changed site layout. * Fixed valid URLs (w/ tests). * Fixed iframe URL extraction. * Fixed token URL extraction. * Fixed variable extraction. * Fixed uploader spelling. * Added upload_date to result dictionary. 2016-02-09 16:25:30 +01:00			`_TESTS = [{`
[laola1tv] Improve extraction and update test case (#3742) 2015-02-23 19:45:36 +01:00			`'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',`
[laola1tv] Add new extractor The extractor works fine, but the f4m downloader cannot handle the resulting bootstrap information. 2014-10-28 16:51:32 +01:00			`'info_dict': {`
[laola1tv] Improve extraction and update test case (#3742) 2015-02-23 19:45:36 +01:00			`'id': '227883',`
[laola1tv] Improve (Closes #8478) 2016-02-14 18:40:26 +01:00			`'display_id': 'straubing-tigers-koelner-haie',`
			`'ext': 'flv',`
[laola1tv] Improve extraction and update test case (#3742) 2015-02-23 19:45:36 +01:00			`'title': 'Straubing Tigers - Kölner Haie',`
[laola1tv] Fixes for changed site layout. * Fixed valid URLs (w/ tests). * Fixed iframe URL extraction. * Fixed token URL extraction. * Fixed variable extraction. * Fixed uploader spelling. * Added upload_date to result dictionary. 2016-02-09 16:25:30 +01:00			`'upload_date': '20140912',`
[laola1tv] Improve (Closes #8478) 2016-02-14 18:40:26 +01:00			`'is_live': False,`
			`'categories': ['Eishockey'],`
[laola1tv] Fixes for changed site layout. * Fixed valid URLs (w/ tests). * Fixed iframe URL extraction. * Fixed token URL extraction. * Fixed variable extraction. * Fixed uploader spelling. * Added upload_date to result dictionary. 2016-02-09 16:25:30 +01:00			`},`
			`'params': {`
			`'skip_download': True,`
[laola1tv] Add support for livestreams (Closes #8934) 2016-03-22 17:32:59 +01:00			`},`
[laola1tv] Fixes for changed site layout. * Fixed valid URLs (w/ tests). * Fixed iframe URL extraction. * Fixed token URL extraction. * Fixed variable extraction. * Fixed uploader spelling. * Added upload_date to result dictionary. 2016-02-09 16:25:30 +01:00			`}, {`
			`'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',`
			`'info_dict': {`
			`'id': '464602',`
[laola1tv] Improve (Closes #8478) 2016-02-14 18:40:26 +01:00			`'display_id': 'straubing-tigers-koelner-haie',`
			`'ext': 'flv',`
[laola1tv] Fixes for changed site layout. * Fixed valid URLs (w/ tests). * Fixed iframe URL extraction. * Fixed token URL extraction. * Fixed variable extraction. * Fixed uploader spelling. * Added upload_date to result dictionary. 2016-02-09 16:25:30 +01:00			`'title': 'Straubing Tigers - Kölner Haie',`
			`'upload_date': '20160129',`
[laola1tv] Improve (Closes #8478) 2016-02-14 18:40:26 +01:00			`'is_live': False,`
			`'categories': ['Eishockey'],`
[laola1tv] Add new extractor The extractor works fine, but the f4m downloader cannot handle the resulting bootstrap information. 2014-10-28 16:51:32 +01:00			`},`
			`'params': {`
			`'skip_download': True,`
[laola1tv] Add support for livestreams (Closes #8934) 2016-03-22 17:32:59 +01:00			`},`
			`}, {`
			`'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',`
			`'info_dict': {`
			`'id': '487850',`
			`'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',`
			`'ext': 'flv',`
			`'title': 'Belogorie BELGOROD - TRENTINO Diatec',`
			`'upload_date': '20160322',`
			`'uploader': 'CEV - Europäischer Volleyball Verband',`
			`'is_live': True,`
			`'categories': ['Volleyball'],`
			`},`
			`'params': {`
			`'skip_download': True,`
			`},`
[laola1tv] Improve error detection and skip an invalid test 2016-04-14 08:11:28 +02:00			`'skip': 'This live stream has already finished.',`
[laola1tv] Fixes for changed site layout. * Fixed valid URLs (w/ tests). * Fixed iframe URL extraction. * Fixed token URL extraction. * Fixed variable extraction. * Fixed uploader spelling. * Added upload_date to result dictionary. 2016-02-09 16:25:30 +01:00			`}]`
[laola1tv] Add new extractor The extractor works fine, but the f4m downloader cannot handle the resulting bootstrap information. 2014-10-28 16:51:32 +01:00
			`def _real_extract(self, url):`
[laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00			`display_id = self._match_id(url)`
[laola1tv] Add new extractor The extractor works fine, but the f4m downloader cannot handle the resulting bootstrap information. 2014-10-28 16:51:32 +01:00
[laola1tv] Improve (Closes #8478) 2016-02-14 18:40:26 +01:00			`webpage = self._download_webpage(url, display_id)`

[laola1tv] Improve error detection and skip an invalid test 2016-04-14 08:11:28 +02:00			`if 'Dieser Livestream ist bereits beendet.' in webpage:`
			`raise ExtractorError('This live stream has already finished.', expected=True)`

[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`conf = self._parse_json(self._search_regex(`
			`r'(?s)conf\s=\s({.+?});', webpage, 'conf'),`
			`display_id, js_to_json)`

			`video_id = conf['videoid']`

			`config = self._download_json(conf['configUrl'], video_id, query={`
			`'videoid': video_id,`
			`'partnerid': conf['partnerid'],`
			`'language': conf.get('language', ''),`
			`'portal': conf.get('portalid', ''),`
			`})`
			`error = config.get('error')`
			`if error:`
			`raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)`

			`video_data = config['video']`
			`title = video_data['title']`
			`is_live = video_data.get('isLivestream') and video_data.get('isLive')`
			`meta = video_data.get('metaInformation')`
			`sports = meta.get('sports')`
			`categories = sports.split(',') if sports else []`

			`token_url = self._extract_token_url(`
			`video_data['streamAccess'], video_id,`
			`video_data['abo']['required'])`

			`formats = self._extract_formats(token_url, video_id)`
[laola1tv] Add new extractor The extractor works fine, but the f4m downloader cannot handle the resulting bootstrap information. 2014-10-28 16:51:32 +01:00
			`return {`
[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`'id': video_id,`
[laola1tv] Improve (Closes #8478) 2016-02-14 18:40:26 +01:00			`'display_id': display_id,`
[laola1tv] fix extraction(closes #12880) 2017-05-04 17:06:21 +02:00			`'title': self._live_title(title) if is_live else title,`
			`'description': video_data.get('description'),`
			`'thumbnail': video_data.get('image'),`
			`'categories': categories,`
			`'formats': formats,`
			`'is_live': is_live,`
[laola1tv] Add new extractor The extractor works fine, but the f4m downloader cannot handle the resulting bootstrap information. 2014-10-28 16:51:32 +01:00			`}`