From 1dd6d9ca9d9d23525a4f00eb851d6e72ef52c4c7 Mon Sep 17 00:00:00 2001 From: zenerdi0de <83358565+zenerdi0de@users.noreply.github.com> Date: Mon, 30 Aug 2021 03:17:50 +0530 Subject: [PATCH] [Patreon] Add PatreonUserIE (#573) Authored by: zenerdi0de --- yt_dlp/extractor/extractors.py | 5 ++- yt_dlp/extractor/patreon.py | 57 +++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index da5716ad1f..c52eb2635e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1006,7 +1006,10 @@ ) from .parliamentliveuk import ParliamentLiveUKIE from .parlview import ParlviewIE -from .patreon import PatreonIE +from .patreon import ( + PatreonIE, + PatreonUserIE +) from .pbs import PBSIE from .pearvideo import PearVideoIE from .peertube import PeerTubeIE diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 7bd892fa56..a189c0237d 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools + from .common import InfoExtractor from .vimeo import VimeoIE @@ -14,7 +16,7 @@ parse_iso8601, str_or_none, try_get, - url_or_none + url_or_none, ) @@ -185,3 +187,56 @@ def _real_extract(self, url): }) return info + + +class PatreonUserIE(InfoExtractor): + + _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?P[-_\w\d]+)/?(?:posts/?)?' + + _TESTS = [{ + 'url': 'https://www.patreon.com/dissonancepod/', + 'info_dict': { + 'title': 'dissonancepod', + }, + 'playlist_mincount': 68, + 'expected_warnings': 'Post not viewable by current user! Skipping!', + }, { + 'url': 'https://www.patreon.com/dissonancepod/posts', + 'only_matching': True + }, ] + + @classmethod + def suitable(cls, url): + return False if PatreonIE.suitable(url) else super(PatreonUserIE, cls).suitable(url) + + def _entries(self, campaign_id, user_id): + cursor = None + params = { + 'fields[campaign]': 'show_audio_post_download_links,name,url', + 'fields[post]': 'current_user_can_view,embed,image,is_paid,post_file,published_at,patreon_url,url,post_type,thumbnail_url,title', + 'filter[campaign_id]': campaign_id, + 'filter[is_draft]': 'false', + 'sort': '-published_at', + 'json-api-version': 1.0, + 'json-api-use-default-includes': 'false', + } + + for page in itertools.count(1): + + params.update({'page[cursor]': cursor} if cursor else {}) + posts_json = self._download_json('https://www.patreon.com/api/posts', user_id, note='Downloading posts page %d' % page, query=params, headers={'Cookie': '.'}) + + cursor = try_get(posts_json, lambda x: x['meta']['pagination']['cursors']['next']) + + for post in posts_json.get('data') or []: + yield self.url_result(url_or_none(try_get(post, lambda x: x['attributes']['patreon_url'])), 'Patreon') + + if cursor is None: + break + + def _real_extract(self, url): + + user_id = self._match_id(url) + webpage = self._download_webpage(url, user_id, headers={'Cookie': '.'}) + campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID') + return self.playlist_result(self._entries(campaign_id, user_id), playlist_title=user_id)