From 4d2498344271650b446286794d79dbbb7ebe64a5 Mon Sep 17 00:00:00 2001 From: Conner Luker Date: Wed, 21 Feb 2024 22:54:27 -0500 Subject: [PATCH 1/4] use m3u8 extractor --- yt_dlp/extractor/frontendmasters.py | 47 +++++++++++------------------ 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index 3bae8add0..8156a9429 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -19,11 +19,6 @@ class FrontendMastersBaseIE(InfoExtractor): _NETRC_MACHINE = 'frontendmasters' - _QUALITIES = { - 'low': {'width': 480, 'height': 360}, - 'mid': {'width': 1280, 'height': 720}, - 'high': {'width': 1920, 'height': 1080} - } def _perform_login(self, username, password): login_page = self._download_webpage( @@ -130,40 +125,32 @@ class FrontendMastersIE(FrontendMastersBaseIE): 'url': 'frontendmasters:a2qogef6ba', 'only_matching': True, }] - def _real_extract(self, url): lesson_id = self._match_id(url) - source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id) + source_url = f'{self._API_BASE}/video/{lesson_id}/source' + headers = { + 'Referer': 'https://frontendmasters.com/', + } + cookies = self._get_cookies("https://frontendmasters.com/") + fem_auth_mod = cookies.get('fem_auth_mod') + if fem_auth_mod: + headers['Cookie'] = f'fem_auth_mod={fem_auth_mod.value}' - formats = [] - for ext in ('webm', 'mp4'): - for quality in ('low', 'mid', 'high'): - resolution = self._QUALITIES[quality].copy() - format_id = '%s-%s' % (ext, quality) - format_url = self._download_json( - source_url, lesson_id, - 'Downloading %s source JSON' % format_id, query={ - 'f': ext, - 'r': resolution['height'], - }, headers={ - 'Referer': url, - }, fatal=False)['url'] + json = self._download_json( + source_url, + 'Downloading source JSON', query={ + 'f': 'm3u8' + }, headers=headers) - if not format_url: - continue + video_url = json.get('url') + + formats = self._extract_m3u8_formats(video_url, lesson_id) - f = resolution.copy() - f.update({ - 'url': format_url, - 'ext': ext, - 'format_id': format_id, - }) - formats.append(f) subtitles = { 'en': [{ - 'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id), + 'url': f'{self._API_BASE}/transcripts/{lesson_id}.vtt' }] } From 4fc802f965063d94dd69e0e5f39ed708f5e2b428 Mon Sep 17 00:00:00 2001 From: Conner Luker Date: Wed, 21 Feb 2024 23:26:30 -0500 Subject: [PATCH 2/4] flake8 --- yt_dlp/extractor/frontendmasters.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index 8156a9429..37025bea9 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -19,7 +19,6 @@ class FrontendMastersBaseIE(InfoExtractor): _NETRC_MACHINE = 'frontendmasters' - def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -125,6 +124,7 @@ class FrontendMastersIE(FrontendMastersBaseIE): 'url': 'frontendmasters:a2qogef6ba', 'only_matching': True, }] + def _real_extract(self, url): lesson_id = self._match_id(url) @@ -137,16 +137,15 @@ def _real_extract(self, url): if fem_auth_mod: headers['Cookie'] = f'fem_auth_mod={fem_auth_mod.value}' - json = self._download_json( + json_response = self._download_json( source_url, 'Downloading source JSON', query={ 'f': 'm3u8' }, headers=headers) - video_url = json.get('url') - - formats = self._extract_m3u8_formats(video_url, lesson_id) + m3u8_url = json_response.get('url') + formats = self._extract_m3u8_formats(m3u8_url, lesson_id) subtitles = { 'en': [{ From d9ab6682231722922a298251122c5b1bb19c70b3 Mon Sep 17 00:00:00 2001 From: Conner Luker Date: Tue, 27 Feb 2024 12:45:48 -0500 Subject: [PATCH 3/4] fix captions --- yt_dlp/extractor/frontendmasters.py | 31 +++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index 37025bea9..373491ded 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -19,6 +19,21 @@ class FrontendMastersBaseIE(InfoExtractor): _NETRC_MACHINE = 'frontendmasters' + def _get_subtitles(self, lesson_data, course_data): + captions_base = "https://captions.frontendmasters.com/assets/courses/" + lesson_slug = lesson_data.get('slug') + lesson_index = lesson_data.get('index') + date_published = course_data.get('datePublished') + course_slug = course_data.get('slug') + + subtitles_url = f'{captions_base}{date_published}-{course_slug}/{lesson_index}-{lesson_slug}.vtt' + + return { + 'en': [{ + 'url': subtitles_url + }] + } + def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -69,7 +84,7 @@ def _extract_chapters(course): return chapters @staticmethod - def _extract_lesson(chapters, lesson_id, lesson): + def _extract_lesson(chapters, lesson_id, lesson, subtitles): title = lesson.get('title') or lesson_id display_id = lesson.get('slug') description = lesson.get('description') @@ -106,6 +121,7 @@ def _extract_lesson(chapters, lesson_id, lesson): 'duration': duration, 'chapter': chapter, 'chapter_number': chapter_number, + 'subtitles': subtitles } @@ -147,17 +163,10 @@ def _real_extract(self, url): formats = self._extract_m3u8_formats(m3u8_url, lesson_id) - subtitles = { - 'en': [{ - 'url': f'{self._API_BASE}/transcripts/{lesson_id}.vtt' - }] - } - return { 'id': lesson_id, 'title': lesson_id, 'formats': formats, - 'subtitles': subtitles } @@ -192,8 +201,9 @@ def _real_extract(self, url): for video_id, data in course['lessonData'].items() if data.get('slug') == lesson_name) + subtitles = self.extract_subtitles(lesson, course) chapters = self._extract_chapters(course) - return self._extract_lesson(chapters, lesson_id, lesson) + return self._extract_lesson(chapters, lesson_id, lesson, subtitles) class FrontendMastersCourseIE(FrontendMastersPageBaseIE): @@ -228,9 +238,10 @@ def _real_extract(self, url): for lesson in lessons: lesson_name = lesson.get('slug') lesson_id = lesson.get('hash') or lesson.get('statsId') + subtitles = self.extract_subtitles(lesson, course) if not lesson_id or not lesson_name: continue - entries.append(self._extract_lesson(chapters, lesson_id, lesson)) + entries.append(self._extract_lesson(chapters, lesson_id, lesson, subtitles)) title = course.get('title') description = course.get('description') From b68122d9aad5432fdb1c4778d1d482dd9058bffb Mon Sep 17 00:00:00 2001 From: Conner Luker Date: Sat, 3 Aug 2024 10:51:29 -0700 Subject: [PATCH 4/4] merge upstream --- yt_dlp/extractor/frontendmasters.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index 373491ded..52faa028e 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( ExtractorError, parse_duration, @@ -42,7 +39,7 @@ def _perform_login(self, username, password): login_form.update({ 'username': username, - 'password': password + 'password': password, }) post_url = self._search_regex( @@ -50,7 +47,7 @@ def _perform_login(self, username, password): 'post_url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( post_url, None, 'Logging in', data=urlencode_postdata(login_form), @@ -65,14 +62,14 @@ def _perform_login(self, username, password): r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P[^<]+)<', response, 'error message', default=None, group='error') if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') class FrontendMastersPageBaseIE(FrontendMastersBaseIE): def _download_course(self, course_name, url): return self._download_json( - '%s/courses/%s' % (self._API_BASE, course_name), course_name, + f'{self._API_BASE}/courses/{course_name}', course_name, 'Downloading course JSON', headers={'Referer': url}) @staticmethod @@ -101,7 +98,7 @@ def _extract_lesson(chapters, lesson_id, lesson, subtitles): duration = None timestamp = lesson.get('timestamp') - if isinstance(timestamp, compat_str): + if isinstance(timestamp, str): mobj = re.search( r'(?P\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P\s*\d{1,2}:\d{1,2}:\d{1,2})', timestamp) @@ -111,7 +108,7 @@ def _extract_lesson(chapters, lesson_id, lesson, subtitles): return { '_type': 'url_transparent', - 'url': 'frontendmasters:%s' % lesson_id, + 'url': f'frontendmasters:{lesson_id}', 'ie_key': FrontendMastersIE.ie_key(), 'id': lesson_id, 'display_id': display_id,