From c196640ff177022ec18150849e4f0de668468569 Mon Sep 17 00:00:00 2001 From: jhwgh1968 Date: Tue, 10 Aug 2021 13:51:12 +0000 Subject: [PATCH] [eroprofile] Add album downloader (#658) Authored by: jhwgh1968 --- yt_dlp/extractor/eroprofile.py | 37 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 5 ++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/eroprofile.py b/yt_dlp/extractor/eroprofile.py index c460dc7f98..54ed9a49e2 100644 --- a/yt_dlp/extractor/eroprofile.py +++ b/yt_dlp/extractor/eroprofile.py @@ -90,3 +90,40 @@ def _real_extract(self, url): 'title': title, 'age_limit': 18, }) + + +class EroProfileAlbumIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P[^/]+)' + IE_NAME = 'EroProfile:album' + + _TESTS = [{ + 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893', + 'info_dict': { + 'id': 'BBW-2-893', + 'title': 'BBW 2' + }, + 'playlist_mincount': 486, + }, + ] + + def _extract_from_page(self, page): + for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page): + yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key()) + + def _entries(self, playlist_id, first_page): + yield from self._extract_from_page(first_page) + + page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page) + + for url, n in page_urls[1:]: + yield from self._extract_from_page(self._download_webpage( + f'https://www.eroprofile.com{url}', + playlist_id, note=f'Downloading playlist page {int(n) - 1}')) + + def _real_extract(self, url): + playlist_id = self._match_id(url) + first_page = self._download_webpage(url, playlist_id, note='Downloading playlist') + playlist_title = self._search_regex( + r'Album: (.*) - EroProfile', first_page, 'playlist_title') + + return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 975fb03288..5b15bb8e74 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -389,7 +389,10 @@ from .embedly import EmbedlyIE from .engadget import EngadgetIE from .eporner import EpornerIE -from .eroprofile import EroProfileIE +from .eroprofile import ( + EroProfileIE, + EroProfileAlbumIE, +) from .escapist import EscapistIE from .espn import ( ESPNIE,