From 9d52bf65ff38386a70493ce152f0883476b0709b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elan=20Ruusam=C3=A4e?= <glen@pld-linux.org>
Date: Tue, 22 Nov 2022 20:09:57 +0200
Subject: [PATCH] [extractor/kanal2] Add extractor (#5575)

Authored by: glensc, pukkandan, bashonly
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/kanal2.py      | 66 +++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 yt_dlp/extractor/kanal2.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 375ac0d06..9d5af491b 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -820,6 +820,7 @@
 from .jwplatform import JWPlatformIE
 from .kakao import KakaoIE
 from .kaltura import KalturaIE
+from .kanal2 import Kanal2IE
 from .karaoketv import KaraoketvIE
 from .karrierevideos import KarriereVideosIE
 from .keezmovies import KeezMoviesIE
diff --git a/yt_dlp/extractor/kanal2.py b/yt_dlp/extractor/kanal2.py
new file mode 100644
index 000000000..3c0efe598
--- /dev/null
+++ b/yt_dlp/extractor/kanal2.py
@@ -0,0 +1,66 @@
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    join_nonempty,
+    traverse_obj,
+    unified_timestamp,
+    update_url_query,
+)
+
+
+class Kanal2IE(InfoExtractor):
+    _VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P<id>\d+)'
+    _TESTS = [{
+        'note': 'Test standard url (#5575)',
+        'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
+        'md5': '7ea7b16266ec1798743777df241883dd',
+        'info_dict': {
+            'id': '40792',
+            'ext': 'mp4',
+            'title': 'Aedniku aabits / Osa 53  (05.08.2016 20:00)',
+            'thumbnail': r're:https?://.*\.jpg$',
+            'description': 'md5:53cabf3c5d73150d594747f727431248',
+            'upload_date': '20160805',
+            'timestamp': 1470420000,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        playlist = self._download_json(
+            f'https://kanal2.postimees.ee/player/playlist/{video_id}',
+            video_id, query={'type': 'episodes'},
+            headers={'X-Requested-With': 'XMLHttpRequest'})
+
+        return {
+            'id': video_id,
+            'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '),
+            'description': traverse_obj(playlist, ('info', 'description')),
+            'thumbnail': traverse_obj(playlist, ('data', 'image')),
+            'formats': self.get_formats(playlist, video_id),
+            'timestamp': unified_timestamp(self._search_regex(
+                r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$',
+                traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'),
+        }
+
+    def get_formats(self, playlist, video_id):
+        path = traverse_obj(playlist, ('data', 'path'))
+        if not path:
+            raise ExtractorError('Path value not found in playlist JSON response')
+        session = self._download_json(
+            'https://sts.postimees.ee/session/register',
+            video_id, note='Creating session', errnote='Error creating session',
+            headers={
+                'X-Original-URI': path,
+                'Accept': 'application/json',
+            })
+        if session.get('reason') != 'OK' or not session.get('session'):
+            reason = session.get('reason', 'unknown error')
+            raise ExtractorError(f'Unable to obtain session: {reason}')
+
+        formats = []
+        for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')):
+            formats.extend(self._extract_m3u8_formats(
+                update_url_query(stream, {'s': session['session']}), video_id, 'mp4'))
+
+        return formats