[pluralsight] Add support for subtitles (Closes #9681)

This commit is contained in:
Sergey M․ 2016-08-24 08:41:52 +07:00
parent c86f51ee38
commit 8c3e35dd44
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -1,9 +1,10 @@
from __future__ import unicode_literals
import re
import json
import random
import collections
import json
import os
import random
import re
from .common import InfoExtractor
from ..compat import (
@ -12,10 +13,12 @@
)
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
parse_duration,
qualities,
sanitized_Request,
srt_subtitles_timecode,
urlencode_postdata,
)
@ -91,6 +94,51 @@ def _login(self):
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
raise ExtractorError('Unable to log in')
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
captions_post = {
'a': author,
'cn': clip_id,
'lc': lang,
'm': name,
}
captions = self._download_json(
'%s/training/Player/Captions' % self._API_BASE, video_id,
'Downloading captions JSON', 'Unable to download captions JSON',
fatal=False, data=json.dumps(captions_post).encode('utf-8'),
headers={'Content-Type': 'application/json;charset=utf-8'})
if captions:
return {
lang: [{
'ext': 'json',
'data': json.dumps(captions),
}, {
'ext': 'srt',
'data': self._convert_subtitles(duration, captions),
}]
}
@staticmethod
def _convert_subtitles(duration, subs):
srt = ''
for num, current in enumerate(subs):
current = subs[num]
start, text = float_or_none(
current.get('DisplayTimeOffset')), current.get('Text')
if start is None or text is None:
continue
end = duration if num == len(subs) - 1 else float_or_none(
subs[num + 1].get('DisplayTimeOffset'))
srt += os.linesep.join(
(
'%d' % num,
'%s --> %s' % (
srt_subtitles_timecode(start),
srt_subtitles_timecode(end)),
text,
os.linesep,
))
return srt
def _real_extract(self, url):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
@ -138,6 +186,8 @@ def _real_extract(self, url):
if not clip:
raise ExtractorError('Unable to resolve clip')
title = '%s - %s' % (module['title'], clip['title'])
QUALITIES = {
'low': {'width': 640, 'height': 480},
'medium': {'width': 848, 'height': 640},
@ -225,18 +275,20 @@ def guess_allowed_qualities():
formats.append(f)
self._sort_formats(formats)
# TODO: captions
# http://www.pluralsight.com/training/Player/ViewClip + cap = true
# or
# http://www.pluralsight.com/training/Player/Captions
# { a = author, cn = clip_id, lc = end, m = name }
duration = int_or_none(
clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
# TODO: other languages?
subtitles = self.extract_subtitles(
author, clip_id, 'en', name, duration, display_id)
return {
'id': clip.get('clipName') or clip['name'],
'title': '%s - %s' % (module['title'], clip['title']),
'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
'title': title,
'duration': duration,
'creator': author,
'formats': formats
'formats': formats,
'subtitles': subtitles,
}