From 0d067e77c3f5527946fb0c22ee1c7011994cba40 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 18:16:17 -0500 Subject: [PATCH] [ie/dangalplay] Add extractors (#10021) Closes #8258 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/dangalplay.py | 197 ++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 yt_dlp/extractor/dangalplay.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b807728ee3..973f8c3213 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -453,6 +453,10 @@ DamtomoRecordIE, DamtomoVideoIE, ) +from .dangalplay import ( + DangalPlayIE, + DangalPlaySeasonIE, +) from .daum import ( DaumIE, DaumClipIE, diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py new file mode 100644 index 0000000000..50e4136b57 --- /dev/null +++ b/yt_dlp/extractor/dangalplay.py @@ -0,0 +1,197 @@ +import hashlib +import json +import re +import time + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none +from ..utils.traversal import traverse_obj + + +class DangalPlayBaseIE(InfoExtractor): + _NETRC_MACHINE = 'dangalplay' + _OTV_USER_ID = None + _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _API_BASE = 'https://ottapi.dangalplay.com' + _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js + _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above + + def _perform_login(self, username, password): + if self._OTV_USER_ID: + return + if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + raise ExtractorError(self._LOGIN_HINT, expected=True) + self._OTV_USER_ID = password + + def _real_initialize(self): + if not self._OTV_USER_ID: + self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None) + + def _extract_episode_info(self, metadata, episode_slug, series_slug): + return { + 'display_id': episode_slug, + 'episode_number': int_or_none(self._search_regex( + r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)), + 'season_number': int_or_none(self._search_regex( + r'season-(\d+)', series_slug, 'season number', default='1')), + 'series': series_slug, + **traverse_obj(metadata, { + 'id': ('content_id', {str}), + 'title': ('display_title', {str}), + 'episode': ('title', {str}), + 'series': ('show_name', {str}, {lambda x: x or None}), + 'series_id': ('catalog_id', {str}), + 'duration': ('duration', {int_or_none}), + 'release_timestamp': ('release_date_uts', {int_or_none}), + }), + } + + def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}): + return self._download_json( + f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, + headers={'Accept': 'application/json'}, query={ + 'auth_token': self._AUTH_TOKEN, + 'region': 'IN', + **query, + }) + + +class DangalPlayIE(DangalPlayBaseIE): + IE_NAME = 'dangalplay' + _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P[^/?#]+)/(?P(?!episodes)[^/?#]+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01', + 'info_dict': { + 'id': '647c61dc1e7171310dcd49b4', + 'ext': 'mp4', + 'release_timestamp': 1262304000, + 'episode_number': 1, + 'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2', + 'series': 'kitani-mohabbat-hai-season-2', + 'season_number': 2, + 'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2', + 'release_date': '20100101', + 'duration': 2325, + 'season': 'Season 2', + 'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01', + 'series_id': '645c9ea41e717158ca574966', + }, + }, { + 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01', + 'info_dict': { + 'id': '65d31d9ba73b9c3abd14a7f3', + 'ext': 'mp4', + 'episode': 'EP 1 | MILKE BHI HUM NA MILE', + 'release_timestamp': 1708367411, + 'episode_number': 1, + 'season': 'Season 1', + 'title': 'EP 1 | MILKE BHI HUM NA MILE', + 'duration': 156048, + 'release_date': '20240219', + 'season_number': 1, + 'series': 'MILKE BHI HUM NA MILE', + 'series_id': '645c9ea41e717158ca574966', + 'display_id': 'milke-bhi-hum-na-mile-ep-number-01', + }, + }] + + def _generate_api_data(self, data): + catalog_id = data['catalog_id'] + content_id = data['content_id'] + timestamp = str(int(time.time())) + unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY)) + + return json.dumps({ + 'catalog_id': catalog_id, + 'content_id': content_id, + 'category': '', + 'region': 'IN', + 'auth_token': self._AUTH_TOKEN, + 'id': self._OTV_USER_ID, + 'md5': hashlib.md5(unhashed.encode()).hexdigest(), + 'ts': timestamp, + }, separators=(',', ':')).encode() + + def _real_extract(self, url): + series_slug, episode_slug = self._match_valid_url(url).group('series', 'id') + metadata = self._call_api( + f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip', + episode_slug, query={'item_language': ''})['data'] + + try: + details = self._download_json( + f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug, + 'Downloading playback details JSON', headers={ + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }, data=self._generate_api_data(metadata))['data'] + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 422: + error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} + if error_info.get('code') == '1016': + self.raise_login_required( + f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) + elif msg := error_info.get('message'): + raise ExtractorError(msg) + raise + + m3u8_url = traverse_obj(details, ( + ('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any)) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4') + + return { + 'formats': formats, + 'subtitles': subtitles, + **self._extract_episode_info(metadata, episode_slug, series_slug), + } + + +class DangalPlaySeasonIE(DangalPlayBaseIE): + IE_NAME = 'dangalplay:season' + _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P[^/?#]+)(?:/(?Pep-[^/?#]+)/episodes)?/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1', + 'playlist_mincount': 170, + 'info_dict': { + 'id': 'kitani-mohabbat-hai-season-1', + }, + }, { + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes', + 'playlist_count': 30, + 'info_dict': { + 'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1', + }, + }, { + # 1 season only, series page is season page + 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile', + 'playlist_mincount': 15, + 'info_dict': { + 'id': 'milke-bhi-hum-na-mile', + }, + }] + + def _entries(self, subcategories, series_slug): + for subcategory in subcategories: + data = self._call_api( + f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip', + series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={ + 'order_by': 'asc', + 'status': 'published', + }) + for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])): + episode_slug = ep['friendly_id'] + yield self.url_result( + f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}', + DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug)) + + def _real_extract(self, url): + series_slug, subcategory = self._match_valid_url(url).group('id', 'sub') + subcategories = [subcategory] if subcategory else traverse_obj( + self._call_api( + f'catalogs/shows/items/{series_slug}.gzip', series_slug, + 'Downloading season info JSON', query={'item_language': ''}), + ('data', 'subcategories', ..., 'friendly_id', {str})) + + return self.playlist_result( + self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory))