diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index b5176aa9b6..52faa028e1 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -16,11 +16,20 @@ class FrontendMastersBaseIE(InfoExtractor): _NETRC_MACHINE = 'frontendmasters' - _QUALITIES = { - 'low': {'width': 480, 'height': 360}, - 'mid': {'width': 1280, 'height': 720}, - 'high': {'width': 1920, 'height': 1080}, - } + def _get_subtitles(self, lesson_data, course_data): + captions_base = "https://captions.frontendmasters.com/assets/courses/" + lesson_slug = lesson_data.get('slug') + lesson_index = lesson_data.get('index') + date_published = course_data.get('datePublished') + course_slug = course_data.get('slug') + + subtitles_url = f'{captions_base}{date_published}-{course_slug}/{lesson_index}-{lesson_slug}.vtt' + + return { + 'en': [{ + 'url': subtitles_url + }] + } def _perform_login(self, username, password): login_page = self._download_webpage( @@ -72,7 +81,7 @@ def _extract_chapters(course): return chapters @staticmethod - def _extract_lesson(chapters, lesson_id, lesson): + def _extract_lesson(chapters, lesson_id, lesson, subtitles): title = lesson.get('title') or lesson_id display_id = lesson.get('slug') description = lesson.get('description') @@ -109,6 +118,7 @@ def _extract_lesson(chapters, lesson_id, lesson): 'duration': duration, 'chapter': chapter, 'chapter_number': chapter_number, + 'subtitles': subtitles } @@ -132,43 +142,28 @@ def _real_extract(self, url): lesson_id = self._match_id(url) source_url = f'{self._API_BASE}/video/{lesson_id}/source' - - formats = [] - for ext in ('webm', 'mp4'): - for quality in ('low', 'mid', 'high'): - resolution = self._QUALITIES[quality].copy() - format_id = f'{ext}-{quality}' - format_url = self._download_json( - source_url, lesson_id, - f'Downloading {format_id} source JSON', query={ - 'f': ext, - 'r': resolution['height'], - }, headers={ - 'Referer': url, - }, fatal=False)['url'] - - if not format_url: - continue - - f = resolution.copy() - f.update({ - 'url': format_url, - 'ext': ext, - 'format_id': format_id, - }) - formats.append(f) - - subtitles = { - 'en': [{ - 'url': f'{self._API_BASE}/transcripts/{lesson_id}.vtt', - }], + headers = { + 'Referer': 'https://frontendmasters.com/', } + cookies = self._get_cookies("https://frontendmasters.com/") + fem_auth_mod = cookies.get('fem_auth_mod') + if fem_auth_mod: + headers['Cookie'] = f'fem_auth_mod={fem_auth_mod.value}' + + json_response = self._download_json( + source_url, + 'Downloading source JSON', query={ + 'f': 'm3u8' + }, headers=headers) + + m3u8_url = json_response.get('url') + + formats = self._extract_m3u8_formats(m3u8_url, lesson_id) return { 'id': lesson_id, 'title': lesson_id, 'formats': formats, - 'subtitles': subtitles, } @@ -203,8 +198,9 @@ def _real_extract(self, url): for video_id, data in course['lessonData'].items() if data.get('slug') == lesson_name) + subtitles = self.extract_subtitles(lesson, course) chapters = self._extract_chapters(course) - return self._extract_lesson(chapters, lesson_id, lesson) + return self._extract_lesson(chapters, lesson_id, lesson, subtitles) class FrontendMastersCourseIE(FrontendMastersPageBaseIE): @@ -239,9 +235,10 @@ def _real_extract(self, url): for lesson in lessons: lesson_name = lesson.get('slug') lesson_id = lesson.get('hash') or lesson.get('statsId') + subtitles = self.extract_subtitles(lesson, course) if not lesson_id or not lesson_name: continue - entries.append(self._extract_lesson(chapters, lesson_id, lesson)) + entries.append(self._extract_lesson(chapters, lesson_id, lesson, subtitles)) title = course.get('title') description = course.get('description')