From 2e5d60b7db7020b726cd54ee4cad8f2afbd1479d Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Thu, 21 Feb 2013 20:51:35 +0100 Subject: [PATCH 01/11] Removed conversion from youtube closed caption format to srt since youtube api supports the 'srt' format --- test/test_youtube_subtitles.py | 4 ++-- youtube_dl/InfoExtractors.py | 24 ++++-------------------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 5d3566a35f..ff09ea459c 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -43,7 +43,7 @@ def test_youtube_subtitles(self): DL.params['writesubtitles'] = True IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - self.assertEqual(md5(info_dict[0]['subtitles']), 'c3228550d59116f3c29fba370b55d033') + self.assertEqual(md5(info_dict[0]['subtitles']), '4cd9278a35ba2305f47354ee13472260') def test_youtube_subtitles_it(self): DL = FakeDownloader() @@ -51,7 +51,7 @@ def test_youtube_subtitles_it(self): DL.params['subtitleslang'] = 'it' IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - self.assertEqual(md5(info_dict[0]['subtitles']), '132a88a0daf8e1520f393eb58f1f646a') + self.assertEqual(md5(info_dict[0]['subtitles']), '164a51f16f260476a05b50fe4c2f161d') if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index d3c3ac2640..e3998fbe88 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -228,23 +228,6 @@ def report_rtmp_download(self): """Indicate the download will use the RTMP protocol.""" self._downloader.to_screen(u'[youtube] RTMP download detected') - def _closed_captions_xml_to_srt(self, xml_string): - srt = '' - texts = re.findall(r'([^<]+)', xml_string, re.MULTILINE) - # TODO parse xml instead of regex - for n, (start, dur_tag, dur, caption) in enumerate(texts): - if not dur: dur = '4' - start = float(start) - end = start + float(dur) - start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000) - end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000) - caption = unescapeHTML(caption) - caption = unescapeHTML(caption) # double cycle, intentional - srt += str(n+1) + '\n' - srt += start + ' --> ' + end + '\n' - srt += caption + '\n\n' - return srt - def _extract_subtitles(self, video_id): self.report_video_subtitles_download(video_id) request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) @@ -268,15 +251,16 @@ def _extract_subtitles(self, video_id): 'lang': srt_lang, 'name': srt_lang_list[srt_lang].encode('utf-8'), 'v': video_id, + 'fmt': 'srt', }) url = 'http://www.youtube.com/api/timedtext?' + params try: - srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8') + srt = compat_urllib_request.urlopen(url).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) - if not srt_xml: + if not srt: return (u'WARNING: Did not fetch video subtitles', None) - return (None, self._closed_captions_xml_to_srt(srt_xml)) + return (None, srt) def _print_formats(self, formats): print('Available formats:') From cdb130b09a16865b81fd34d19b74fa634d45cad7 Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Thu, 21 Feb 2013 22:12:36 +0100 Subject: [PATCH 02/11] Added new option '--only-srt' to download only the subtitles of a video Improved option '--srt-lang' - it shows the argument in case of missing subtitles - added language suffix for non-english languages (e.g. video.it.srt) --- test/test_youtube_subtitles.py | 7 +++++++ youtube_dl/FileDownloader.py | 5 +++++ youtube_dl/InfoExtractors.py | 7 ++++++- youtube_dl/__init__.py | 4 ++++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index ff09ea459c..77c275b75f 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -53,5 +53,12 @@ def test_youtube_subtitles_it(self): info_dict = IE.extract('QRS8MkLhQmM') self.assertEqual(md5(info_dict[0]['subtitles']), '164a51f16f260476a05b50fe4c2f161d') + def test_youtube_onlysubtitles(self): + DL = FakeDownloader() + DL.params['onlysubtitles'] = True + IE = YoutubeIE(DL) + info_dict = IE.extract('QRS8MkLhQmM') + self.assertEqual(md5(info_dict[0]['subtitles']), '4cd9278a35ba2305f47354ee13472260') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 53c2d1dce0..487c9dadbf 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -79,6 +79,7 @@ class FileDownloader(object): writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file writesubtitles: Write the video subtitles to a .srt file + onlysubtitles: Downloads only the subtitles of the video subtitleslang: Language of the subtitles to download test: Download only first bytes to test the downloader. keepvideo: Keep the video file after post-processing @@ -443,9 +444,13 @@ def process_info(self, info_dict): # that way it will silently go on when used with unsupporting IE try: srtfn = filename.rsplit('.', 1)[0] + u'.srt' + if self.params.get('subtitleslang', False): + srtfn = filename.rsplit('.', 1)[0] + u'.' + self.params['subtitleslang'] + u'.srt' self.report_writesubtitles(srtfn) with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile: srtfile.write(info_dict['subtitles']) + if self.params.get('onlysubtitles', False): + return except (OSError, IOError): self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) return diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index e3998fbe88..51b263383d 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -228,6 +228,7 @@ def report_rtmp_download(self): """Indicate the download will use the RTMP protocol.""" self._downloader.to_screen(u'[youtube] RTMP download detected') + def _extract_subtitles(self, video_id): self.report_video_subtitles_download(video_id) request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) @@ -246,7 +247,7 @@ def _extract_subtitles(self, video_id): else: srt_lang = list(srt_lang_list.keys())[0] if not srt_lang in srt_lang_list: - return (u'WARNING: no closed captions found in the specified language', None) + return (u'WARNING: no closed captions found in the specified language "%s"' % srt_lang, None) params = compat_urllib_parse.urlencode({ 'lang': srt_lang, 'name': srt_lang_list[srt_lang].encode('utf-8'), @@ -483,6 +484,10 @@ def _real_extract(self, url): # closed captions video_subtitles = None + if self._downloader.params.get('subtitleslang', False): + self._downloader.params['writesubtitles'] = True + if self._downloader.params.get('onlysubtitles', False): + self._downloader.params['writesubtitles'] = True if self._downloader.params.get('writesubtitles', False): (srt_error, video_subtitles) = self._extract_subtitles(video_id) if srt_error: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 23e3c2ac29..ababeac872 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -176,6 +176,9 @@ def _find_term_columns(): video_format.add_option('--write-srt', action='store_true', dest='writesubtitles', help='write video closed captions to a .srt file (currently youtube only)', default=False) + video_format.add_option('--only-srt', + action='store_true', dest='onlysubtitles', + help='downloads only the subtitles of the video (currently youtube only)', default=False) video_format.add_option('--srt-lang', action='store', dest='subtitleslang', metavar='LANG', help='language of the closed captions to download (optional) use IETF language tags like \'en\'') @@ -450,6 +453,7 @@ def _real_main(): 'writedescription': opts.writedescription, 'writeinfojson': opts.writeinfojson, 'writesubtitles': opts.writesubtitles, + 'onlysubtitles': opts.onlysubtitles, 'subtitleslang': opts.subtitleslang, 'matchtitle': decodeOption(opts.matchtitle), 'rejecttitle': decodeOption(opts.rejecttitle), From ae608b8076497d70e2a95e5e939c1fb31e2dde53 Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Fri, 22 Feb 2013 02:52:55 +0100 Subject: [PATCH 03/11] Added new option '--all-srt' to download all the subtitles of a video. Only works in youtube for the moment. --- test/parameters.json | 6 ++- test/test_youtube_subtitles.py | 31 ++++++++++++--- youtube_dl/FileDownloader.py | 28 ++++++++++--- youtube_dl/InfoExtractors.py | 73 ++++++++++++++++++++++++---------- youtube_dl/__init__.py | 4 ++ 5 files changed, 107 insertions(+), 35 deletions(-) diff --git a/test/parameters.json b/test/parameters.json index 8215d25c5c..0d4bd644cf 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -36,5 +36,7 @@ "verbose": true, "writedescription": false, "writeinfojson": true, - "writesubtitles": false -} \ No newline at end of file + "writesubtitles": false, + "onlysubtitles": false, + "allsubtitles": false +} diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 77c275b75f..3b5a53fca1 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -38,27 +38,48 @@ def download(self, x): md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class TestYoutubeSubtitles(unittest.TestCase): + def setUp(self): + DL = FakeDownloader() + DL.params['allsubtitles'] = False + DL.params['writesubtitles'] = False + + def test_youtube_no_subtitles(self): + DL = FakeDownloader() + DL.params['writesubtitles'] = False + IE = YoutubeIE(DL) + info_dict = IE.extract('QRS8MkLhQmM') + subtitles = info_dict[0]['subtitles'] + self.assertEqual(subtitles, None) def test_youtube_subtitles(self): DL = FakeDownloader() DL.params['writesubtitles'] = True IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - self.assertEqual(md5(info_dict[0]['subtitles']), '4cd9278a35ba2305f47354ee13472260') - + sub = info_dict[0]['subtitles'][0] + self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') def test_youtube_subtitles_it(self): DL = FakeDownloader() DL.params['writesubtitles'] = True DL.params['subtitleslang'] = 'it' IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - self.assertEqual(md5(info_dict[0]['subtitles']), '164a51f16f260476a05b50fe4c2f161d') - + sub = info_dict[0]['subtitles'][0] + self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d') def test_youtube_onlysubtitles(self): DL = FakeDownloader() + DL.params['writesubtitles'] = True DL.params['onlysubtitles'] = True IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') - self.assertEqual(md5(info_dict[0]['subtitles']), '4cd9278a35ba2305f47354ee13472260') + sub = info_dict[0]['subtitles'][0] + self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') + def test_youtube_allsubtitles(self): + DL = FakeDownloader() + DL.params['allsubtitles'] = True + IE = YoutubeIE(DL) + info_dict = IE.extract('QRS8MkLhQmM') + subtitles = info_dict[0]['subtitles'] + self.assertEqual(len(subtitles), 12) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 487c9dadbf..e496b8a8de 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -80,6 +80,7 @@ class FileDownloader(object): writeinfojson: Write the video description to a .info.json file writesubtitles: Write the video subtitles to a .srt file onlysubtitles: Downloads only the subtitles of the video + allsubtitles: Downloads all the subtitles of the video subtitleslang: Language of the subtitles to download test: Download only first bytes to test the downloader. keepvideo: Keep the video file after post-processing @@ -442,18 +443,33 @@ def process_info(self, info_dict): if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE + subtitle = info_dict['subtitles'][0] + (srt_error, srt_lang, srt) = subtitle try: - srtfn = filename.rsplit('.', 1)[0] + u'.srt' - if self.params.get('subtitleslang', False): - srtfn = filename.rsplit('.', 1)[0] + u'.' + self.params['subtitleslang'] + u'.srt' + srtfn = filename.rsplit('.', 1)[0] + u'.' + srt_lang + u'.srt' self.report_writesubtitles(srtfn) with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile: - srtfile.write(info_dict['subtitles']) - if self.params.get('onlysubtitles', False): - return + srtfile.write(srt) except (OSError, IOError): self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) return + if self.params.get('onlysubtitles', False): + return + + if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: + subtitles = info_dict['subtitles'] + for subtitle in subtitles: + (srt_error, srt_lang, srt) = subtitle + try: + srtfn = filename.rsplit('.', 1)[0] + u'.' + srt_lang + u'.srt' + self.report_writesubtitles(srtfn) + with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile: + srtfile.write(srt) + except (OSError, IOError): + self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) + return + if self.params.get('onlysubtitles', False): + return if self.params.get('writeinfojson', False): infofn = filename + u'.info.json' diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 51b263383d..a220de80a4 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -216,6 +216,10 @@ def report_video_subtitles_download(self, video_id): """Report attempt to download video info webpage.""" self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id) + def report_video_subtitles_request(self, video_id, lang): + """Report attempt to download video info webpage.""" + self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for lang: %s' % (video_id,lang)) + def report_information_extraction(self, video_id): """Report attempt to extract video information.""" self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id) @@ -228,9 +232,7 @@ def report_rtmp_download(self): """Indicate the download will use the RTMP protocol.""" self._downloader.to_screen(u'[youtube] RTMP download detected') - - def _extract_subtitles(self, video_id): - self.report_video_subtitles_download(video_id) + def _get_available_subtitles(self, video_id): request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) try: srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8') @@ -240,19 +242,15 @@ def _extract_subtitles(self, video_id): srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) if not srt_lang_list: return (u'WARNING: video has no closed captions', None) - if self._downloader.params.get('subtitleslang', False): - srt_lang = self._downloader.params.get('subtitleslang') - elif 'en' in srt_lang_list: - srt_lang = 'en' - else: - srt_lang = list(srt_lang_list.keys())[0] - if not srt_lang in srt_lang_list: - return (u'WARNING: no closed captions found in the specified language "%s"' % srt_lang, None) + return srt_lang_list + + def _request_subtitle(self, str_lang, str_name, video_id, format = 'srt'): + self.report_video_subtitles_request(video_id, str_lang) params = compat_urllib_parse.urlencode({ - 'lang': srt_lang, - 'name': srt_lang_list[srt_lang].encode('utf-8'), + 'lang': str_lang, + 'name': str_name, 'v': video_id, - 'fmt': 'srt', + 'fmt': format, }) url = 'http://www.youtube.com/api/timedtext?' + params try: @@ -261,7 +259,32 @@ def _extract_subtitles(self, video_id): return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) if not srt: return (u'WARNING: Did not fetch video subtitles', None) - return (None, srt) + return (None, str_lang, srt) + + def _extract_subtitle(self, video_id): + self.report_video_subtitles_download(video_id) + srt_lang_list = self._get_available_subtitles(video_id) + + if self._downloader.params.get('subtitleslang', False): + srt_lang = self._downloader.params.get('subtitleslang') + elif 'en' in srt_lang_list: + srt_lang = 'en' + else: + srt_lang = list(srt_lang_list.keys())[0] + if not srt_lang in srt_lang_list: + return (u'WARNING: no closed captions found in the specified language "%s"' % srt_lang, None) + + sub = self._request_subtitle(srt_lang, srt_lang_list[srt_lang].encode('utf-8'), video_id) + return [sub] + + def _extract_all_subtitles(self, video_id): + self.report_video_subtitles_download(video_id) + srt_lang_list = self._get_available_subtitles(video_id) + subs = [] + for srt_lang in srt_lang_list: + sub = self._request_subtitle(srt_lang, srt_lang_list[srt_lang].encode('utf-8'), video_id) + subs.append(sub) + return subs def _print_formats(self, formats): print('Available formats:') @@ -484,14 +507,20 @@ def _real_extract(self, url): # closed captions video_subtitles = None - if self._downloader.params.get('subtitleslang', False): - self._downloader.params['writesubtitles'] = True - if self._downloader.params.get('onlysubtitles', False): - self._downloader.params['writesubtitles'] = True + if self._downloader.params.get('writesubtitles', False): - (srt_error, video_subtitles) = self._extract_subtitles(video_id) - if srt_error: - self._downloader.trouble(srt_error) + video_subtitles = self._extract_subtitle(video_id) + if video_subtitles: + (srt_error, srt_lang, srt) = video_subtitles[0] + if srt_error: + self._downloader.trouble(srt_error) + + if self._downloader.params.get('allsubtitles', False): + video_subtitles = self._extract_all_subtitles(video_id) + for video_subtitle in video_subtitles: + (srt_error, srt_lang, srt) = video_subtitle + if srt_error: + self._downloader.trouble(srt_error) if 'length_seconds' not in video_info: self._downloader.trouble(u'WARNING: unable to extract video duration') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ababeac872..20a22a4d11 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -179,6 +179,9 @@ def _find_term_columns(): video_format.add_option('--only-srt', action='store_true', dest='onlysubtitles', help='downloads only the subtitles of the video (currently youtube only)', default=False) + video_format.add_option('--all-srt', + action='store_true', dest='allsubtitles', + help='downloads all the available subtitles of the video (currently youtube only)', default=False) video_format.add_option('--srt-lang', action='store', dest='subtitleslang', metavar='LANG', help='language of the closed captions to download (optional) use IETF language tags like \'en\'') @@ -454,6 +457,7 @@ def _real_main(): 'writeinfojson': opts.writeinfojson, 'writesubtitles': opts.writesubtitles, 'onlysubtitles': opts.onlysubtitles, + 'allsubtitles': opts.allsubtitles, 'subtitleslang': opts.subtitleslang, 'matchtitle': decodeOption(opts.matchtitle), 'rejecttitle': decodeOption(opts.rejecttitle), From 553d097442ad5ee62d227de2e2703a2377dcf40f Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Fri, 22 Feb 2013 03:13:28 +0100 Subject: [PATCH 04/11] Refactor subtitle options from srt to the more generic 'sub'. In order to be more consistent with different subtitle formats. From: * --write-srt to --write-sub * --only-srt to --only-sub * --all-srt to --all-subs * --srt-lang to --sub-lang' Refactored also all the mentions of srt for sub in all the source code. --- youtube_dl/FileDownloader.py | 26 +++++++------- youtube_dl/InfoExtractors.py | 68 ++++++++++++++++++------------------ youtube_dl/__init__.py | 14 ++++---- 3 files changed, 54 insertions(+), 54 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index e496b8a8de..4549dd4648 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -78,7 +78,7 @@ class FileDownloader(object): updatetime: Use the Last-modified header to set output file timestamps. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file - writesubtitles: Write the video subtitles to a .srt file + writesubtitles: Write the video subtitles to a file (default=srt) onlysubtitles: Downloads only the subtitles of the video allsubtitles: Downloads all the subtitles of the video subtitleslang: Language of the subtitles to download @@ -291,9 +291,9 @@ def report_writedescription(self, descfn): """ Report that the description file is being written """ self.to_screen(u'[info] Writing video description to: ' + descfn) - def report_writesubtitles(self, srtfn): + def report_writesubtitles(self, sub_filename): """ Report that the subtitles file is being written """ - self.to_screen(u'[info] Writing video subtitles to: ' + srtfn) + self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename) def report_writeinfojson(self, infofn): """ Report that the metadata file has been written """ @@ -444,12 +444,12 @@ def process_info(self, info_dict): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitle = info_dict['subtitles'][0] - (srt_error, srt_lang, srt) = subtitle + (sub_error, sub_lang, sub) = subtitle try: - srtfn = filename.rsplit('.', 1)[0] + u'.' + srt_lang + u'.srt' - self.report_writesubtitles(srtfn) - with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile: - srtfile.write(srt) + sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.srt' + self.report_writesubtitles(sub_filename) + with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: + subfile.write(sub) except (OSError, IOError): self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) return @@ -459,12 +459,12 @@ def process_info(self, info_dict): if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: subtitles = info_dict['subtitles'] for subtitle in subtitles: - (srt_error, srt_lang, srt) = subtitle + (sub_error, sub_lang, sub) = subtitle try: - srtfn = filename.rsplit('.', 1)[0] + u'.' + srt_lang + u'.srt' - self.report_writesubtitles(srtfn) - with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile: - srtfile.write(srt) + sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.srt' + self.report_writesubtitles(sub_filename) + with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: + subfile.write(sub) except (OSError, IOError): self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) return diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index a220de80a4..e078bb083f 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -47,7 +47,7 @@ class InfoExtractor(object): uploader_id: Nickname or id of the video uploader. location: Physical location of the video. player_url: SWF Player URL (used for rtmpdump). - subtitles: The .srt file contents. + subtitles: The subtitle file contents. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen @@ -235,56 +235,56 @@ def report_rtmp_download(self): def _get_available_subtitles(self, video_id): request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) try: - srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8') + sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) - srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) - srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) - if not srt_lang_list: + sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) + sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) + if not sub_lang_list: return (u'WARNING: video has no closed captions', None) - return srt_lang_list + return sub_lang_list - def _request_subtitle(self, str_lang, str_name, video_id, format = 'srt'): - self.report_video_subtitles_request(video_id, str_lang) + def _request_subtitle(self, sub_lang, sub_name, video_id, format = 'srt'): + self.report_video_subtitles_request(video_id, sub_lang) params = compat_urllib_parse.urlencode({ - 'lang': str_lang, - 'name': str_name, + 'lang': sub_lang, + 'name': sub_name, 'v': video_id, 'fmt': format, }) url = 'http://www.youtube.com/api/timedtext?' + params try: - srt = compat_urllib_request.urlopen(url).read().decode('utf-8') + sub = compat_urllib_request.urlopen(url).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) - if not srt: + if not sub: return (u'WARNING: Did not fetch video subtitles', None) - return (None, str_lang, srt) + return (None, sub_lang, sub) def _extract_subtitle(self, video_id): self.report_video_subtitles_download(video_id) - srt_lang_list = self._get_available_subtitles(video_id) + sub_lang_list = self._get_available_subtitles(video_id) if self._downloader.params.get('subtitleslang', False): - srt_lang = self._downloader.params.get('subtitleslang') - elif 'en' in srt_lang_list: - srt_lang = 'en' + sub_lang = self._downloader.params.get('subtitleslang') + elif 'en' in sub_lang_list: + sub_lang = 'en' else: - srt_lang = list(srt_lang_list.keys())[0] - if not srt_lang in srt_lang_list: - return (u'WARNING: no closed captions found in the specified language "%s"' % srt_lang, None) + sub_lang = list(sub_lang_list.keys())[0] + if not sub_lang in sub_lang_list: + return (u'WARNING: no closed captions found in the specified language "%s"' % sub_lang, None) - sub = self._request_subtitle(srt_lang, srt_lang_list[srt_lang].encode('utf-8'), video_id) - return [sub] + subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id) + return [subtitle] def _extract_all_subtitles(self, video_id): self.report_video_subtitles_download(video_id) - srt_lang_list = self._get_available_subtitles(video_id) - subs = [] - for srt_lang in srt_lang_list: - sub = self._request_subtitle(srt_lang, srt_lang_list[srt_lang].encode('utf-8'), video_id) - subs.append(sub) - return subs + sub_lang_list = self._get_available_subtitles(video_id) + subtitles = [] + for sub_lang in sub_lang_list: + subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id) + subtitles.append(subtitle) + return subtitles def _print_formats(self, formats): print('Available formats:') @@ -511,16 +511,16 @@ def _real_extract(self, url): if self._downloader.params.get('writesubtitles', False): video_subtitles = self._extract_subtitle(video_id) if video_subtitles: - (srt_error, srt_lang, srt) = video_subtitles[0] - if srt_error: - self._downloader.trouble(srt_error) + (sub_error, sub_lang, sub) = video_subtitles[0] + if sub_error: + self._downloader.trouble(sub_error) if self._downloader.params.get('allsubtitles', False): video_subtitles = self._extract_all_subtitles(video_id) for video_subtitle in video_subtitles: - (srt_error, srt_lang, srt) = video_subtitle - if srt_error: - self._downloader.trouble(srt_error) + (sub_error, sub_lang, sub) = video_subtitle + if sub_error: + self._downloader.trouble(sub_error) if 'length_seconds' not in video_info: self._downloader.trouble(u'WARNING: unable to extract video duration') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 20a22a4d11..495b5ac41f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -173,18 +173,18 @@ def _find_term_columns(): action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') video_format.add_option('-F', '--list-formats', action='store_true', dest='listformats', help='list all available formats (currently youtube only)') - video_format.add_option('--write-srt', + video_format.add_option('--write-sub', action='store_true', dest='writesubtitles', - help='write video closed captions to a .srt file (currently youtube only)', default=False) - video_format.add_option('--only-srt', + help='write subtitle file (currently youtube only)', default=False) + video_format.add_option('--only-sub', action='store_true', dest='onlysubtitles', - help='downloads only the subtitles of the video (currently youtube only)', default=False) - video_format.add_option('--all-srt', + help='downloads only the subtitles (no video)', default=False) + video_format.add_option('--all-subs', action='store_true', dest='allsubtitles', help='downloads all the available subtitles of the video (currently youtube only)', default=False) - video_format.add_option('--srt-lang', + video_format.add_option('--sub-lang', action='store', dest='subtitleslang', metavar='LANG', - help='language of the closed captions to download (optional) use IETF language tags like \'en\'') + help='language of the subtitles to download (optional) use IETF language tags like \'en\'') verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) From 9e62bc443996c1950de0841997c76d110cb77c6e Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Fri, 22 Feb 2013 03:53:54 +0100 Subject: [PATCH 05/11] Added new option '--sub-format' to choose the format of the subtitles to downloade (defaut=srt) --- test/parameters.json | 1 + test/test_youtube_subtitles.py | 10 +++++++++- youtube_dl/FileDownloader.py | 9 ++++++--- youtube_dl/InfoExtractors.py | 11 ++++++----- youtube_dl/__init__.py | 4 ++++ 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/test/parameters.json b/test/parameters.json index 0d4bd644cf..750b1c96e1 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -29,6 +29,7 @@ "simulate": false, "skip_download": false, "subtitleslang": null, + "subtitlesformat": "srt", "test": true, "updatetime": true, "usenetrc": false, diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 3b5a53fca1..94adc45552 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -42,7 +42,7 @@ def setUp(self): DL = FakeDownloader() DL.params['allsubtitles'] = False DL.params['writesubtitles'] = False - + DL.params['subtitlesformat'] = 'srt' def test_youtube_no_subtitles(self): DL = FakeDownloader() DL.params['writesubtitles'] = False @@ -80,6 +80,14 @@ def test_youtube_allsubtitles(self): info_dict = IE.extract('QRS8MkLhQmM') subtitles = info_dict[0]['subtitles'] self.assertEqual(len(subtitles), 12) + def test_youtube_subtitles_format(self): + DL = FakeDownloader() + DL.params['writesubtitles'] = True + DL.params['subtitlesformat'] = 'sbv' + IE = YoutubeIE(DL) + info_dict = IE.extract('QRS8MkLhQmM') + sub = info_dict[0]['subtitles'][0] + self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 4549dd4648..a041e12199 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -78,9 +78,10 @@ class FileDownloader(object): updatetime: Use the Last-modified header to set output file timestamps. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file - writesubtitles: Write the video subtitles to a file (default=srt) + writesubtitles: Write the video subtitles to a file onlysubtitles: Downloads only the subtitles of the video allsubtitles: Downloads all the subtitles of the video + subtitlesformat: Subtitle format [sbv/srt] (default=srt) subtitleslang: Language of the subtitles to download test: Download only first bytes to test the downloader. keepvideo: Keep the video file after post-processing @@ -445,8 +446,9 @@ def process_info(self, info_dict): # that way it will silently go on when used with unsupporting IE subtitle = info_dict['subtitles'][0] (sub_error, sub_lang, sub) = subtitle + sub_format = self.params.get('subtitlesformat') try: - sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.srt' + sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format self.report_writesubtitles(sub_filename) with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: subfile.write(sub) @@ -458,10 +460,11 @@ def process_info(self, info_dict): if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: subtitles = info_dict['subtitles'] + sub_format = self.params.get('subtitlesformat') for subtitle in subtitles: (sub_error, sub_lang, sub) = subtitle try: - sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.srt' + sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format self.report_writesubtitles(sub_filename) with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: subfile.write(sub) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index e078bb083f..62522bb6cf 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -244,7 +244,7 @@ def _get_available_subtitles(self, video_id): return (u'WARNING: video has no closed captions', None) return sub_lang_list - def _request_subtitle(self, sub_lang, sub_name, video_id, format = 'srt'): + def _request_subtitle(self, sub_lang, sub_name, video_id, format): self.report_video_subtitles_request(video_id, sub_lang) params = compat_urllib_parse.urlencode({ 'lang': sub_lang, @@ -264,7 +264,7 @@ def _request_subtitle(self, sub_lang, sub_name, video_id, format = 'srt'): def _extract_subtitle(self, video_id): self.report_video_subtitles_download(video_id) sub_lang_list = self._get_available_subtitles(video_id) - + sub_format = self._downloader.params.get('subtitlesformat') if self._downloader.params.get('subtitleslang', False): sub_lang = self._downloader.params.get('subtitleslang') elif 'en' in sub_lang_list: @@ -274,15 +274,16 @@ def _extract_subtitle(self, video_id): if not sub_lang in sub_lang_list: return (u'WARNING: no closed captions found in the specified language "%s"' % sub_lang, None) - subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id) + subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) return [subtitle] def _extract_all_subtitles(self, video_id): self.report_video_subtitles_download(video_id) sub_lang_list = self._get_available_subtitles(video_id) + sub_format = self._downloader.params.get('subtitlesformat') subtitles = [] for sub_lang in sub_lang_list: - subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id) + subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) subtitles.append(subtitle) return subtitles @@ -505,7 +506,7 @@ def _real_extract(self, url): else: video_description = '' - # closed captions + # subtitles video_subtitles = None if self._downloader.params.get('writesubtitles', False): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 495b5ac41f..914d030a37 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -182,6 +182,9 @@ def _find_term_columns(): video_format.add_option('--all-subs', action='store_true', dest='allsubtitles', help='downloads all the available subtitles of the video (currently youtube only)', default=False) + video_format.add_option('--sub-format', + action='store', dest='subtitlesformat', metavar='LANG', + help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt') video_format.add_option('--sub-lang', action='store', dest='subtitleslang', metavar='LANG', help='language of the subtitles to download (optional) use IETF language tags like \'en\'') @@ -458,6 +461,7 @@ def _real_main(): 'writesubtitles': opts.writesubtitles, 'onlysubtitles': opts.onlysubtitles, 'allsubtitles': opts.allsubtitles, + 'subtitlesformat': opts.subtitlesformat, 'subtitleslang': opts.subtitleslang, 'matchtitle': decodeOption(opts.matchtitle), 'rejecttitle': decodeOption(opts.rejecttitle), From 2a4093eaf3af07fa0a74926ce09cb49aba73017e Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Fri, 22 Feb 2013 04:50:05 +0100 Subject: [PATCH 06/11] Added new option '--list-subs' to show the available subtitle languages --- test/parameters.json | 3 ++- test/test_youtube_subtitles.py | 7 +++++++ youtube_dl/FileDownloader.py | 1 + youtube_dl/InfoExtractors.py | 26 +++++++++++++++++++------- youtube_dl/__init__.py | 4 ++++ 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/test/parameters.json b/test/parameters.json index 750b1c96e1..96998b5c39 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -39,5 +39,6 @@ "writeinfojson": true, "writesubtitles": false, "onlysubtitles": false, - "allsubtitles": false + "allsubtitles": false, + "listssubtitles": false } diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 94adc45552..30f2246dd9 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -43,6 +43,7 @@ def setUp(self): DL.params['allsubtitles'] = False DL.params['writesubtitles'] = False DL.params['subtitlesformat'] = 'srt' + DL.params['listsubtitles'] = False def test_youtube_no_subtitles(self): DL = FakeDownloader() DL.params['writesubtitles'] = False @@ -88,6 +89,12 @@ def test_youtube_subtitles_format(self): info_dict = IE.extract('QRS8MkLhQmM') sub = info_dict[0]['subtitles'][0] self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') + def test_youtube_list_subtitles(self): + DL = FakeDownloader() + DL.params['listsubtitles'] = True + IE = YoutubeIE(DL) + info_dict = IE.extract('QRS8MkLhQmM') + self.assertEqual(info_dict, None) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index a041e12199..164d25e54d 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -81,6 +81,7 @@ class FileDownloader(object): writesubtitles: Write the video subtitles to a file onlysubtitles: Downloads only the subtitles of the video allsubtitles: Downloads all the subtitles of the video + listsubtitles: Lists all available subtitles for the video subtitlesformat: Subtitle format [sbv/srt] (default=srt) subtitleslang: Language of the subtitles to download test: Download only first bytes to test the downloader. diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 62522bb6cf..ff1fab7734 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -214,11 +214,16 @@ def report_video_info_webpage_download(self, video_id): def report_video_subtitles_download(self, video_id): """Report attempt to download video info webpage.""" - self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id) + self._downloader.to_screen(u'[youtube] %s: Checking available subtitles' % video_id) - def report_video_subtitles_request(self, video_id, lang): + def report_video_subtitles_request(self, video_id, sub_lang, format): """Report attempt to download video info webpage.""" - self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for lang: %s' % (video_id,lang)) + self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format)) + + def report_video_subtitles_available(self, video_id, sub_lang_list): + """Report available subtitles.""" + sub_lang = ",".join(list(sub_lang_list.keys())) + self._downloader.to_screen(u'[youtube] %s: Available subtitles for video: %s' % (video_id, sub_lang)) def report_information_extraction(self, video_id): """Report attempt to extract video information.""" @@ -233,6 +238,7 @@ def report_rtmp_download(self): self._downloader.to_screen(u'[youtube] RTMP download detected') def _get_available_subtitles(self, video_id): + self.report_video_subtitles_download(video_id) request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) try: sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') @@ -241,11 +247,15 @@ def _get_available_subtitles(self, video_id): sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) if not sub_lang_list: - return (u'WARNING: video has no closed captions', None) + return (u'WARNING: video doesn\'t have download', None) return sub_lang_list + def _list_available_subtitles(self, video_id): + sub_lang_list = self._get_available_subtitles(video_id) + self.report_video_subtitles_available(video_id, sub_lang_list) + def _request_subtitle(self, sub_lang, sub_name, video_id, format): - self.report_video_subtitles_request(video_id, sub_lang) + self.report_video_subtitles_request(video_id, sub_lang, format) params = compat_urllib_parse.urlencode({ 'lang': sub_lang, 'name': sub_name, @@ -262,7 +272,6 @@ def _request_subtitle(self, sub_lang, sub_name, video_id, format): return (None, sub_lang, sub) def _extract_subtitle(self, video_id): - self.report_video_subtitles_download(video_id) sub_lang_list = self._get_available_subtitles(video_id) sub_format = self._downloader.params.get('subtitlesformat') if self._downloader.params.get('subtitleslang', False): @@ -278,7 +287,6 @@ def _extract_subtitle(self, video_id): return [subtitle] def _extract_all_subtitles(self, video_id): - self.report_video_subtitles_download(video_id) sub_lang_list = self._get_available_subtitles(video_id) sub_format = self._downloader.params.get('subtitlesformat') subtitles = [] @@ -523,6 +531,10 @@ def _real_extract(self, url): if sub_error: self._downloader.trouble(sub_error) + if self._downloader.params.get('listsubtitles', False): + sub_lang_list = self._list_available_subtitles(video_id) + return + if 'length_seconds' not in video_info: self._downloader.trouble(u'WARNING: unable to extract video duration') video_duration = '' diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 914d030a37..e5a7469af2 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -182,6 +182,9 @@ def _find_term_columns(): video_format.add_option('--all-subs', action='store_true', dest='allsubtitles', help='downloads all the available subtitles of the video (currently youtube only)', default=False) + video_format.add_option('--list-subs', + action='store_true', dest='listsubtitles', + help='lists all available subtitles for the video (currently youtube only)', default=False) video_format.add_option('--sub-format', action='store', dest='subtitlesformat', metavar='LANG', help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt') @@ -461,6 +464,7 @@ def _real_main(): 'writesubtitles': opts.writesubtitles, 'onlysubtitles': opts.onlysubtitles, 'allsubtitles': opts.allsubtitles, + 'listsubtitles': opts.listsubtitles, 'subtitlesformat': opts.subtitlesformat, 'subtitleslang': opts.subtitleslang, 'matchtitle': decodeOption(opts.matchtitle), From c0ba10467457a58e7198b58793f3c4683b1c3ec7 Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Sat, 23 Feb 2013 16:24:59 +0100 Subject: [PATCH 07/11] Fixed typo in error message when no subtitles were available. --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ff1fab7734..ab8bd21045 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -247,7 +247,7 @@ def _get_available_subtitles(self, video_id): sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) if not sub_lang_list: - return (u'WARNING: video doesn\'t have download', None) + return (u'WARNING: video doesn\'t have subtitles', None) return sub_lang_list def _list_available_subtitles(self, video_id): From b9fc428494b22623529d364387b8693cc3cb1503 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Wed, 20 Mar 2013 11:29:07 +0100 Subject: [PATCH 08/11] add '--write-srt' and '--srt-lang' aliases for backwards compatibility --- youtube_dl/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e5a7469af2..c4f64893d4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -173,7 +173,7 @@ def _find_term_columns(): action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') video_format.add_option('-F', '--list-formats', action='store_true', dest='listformats', help='list all available formats (currently youtube only)') - video_format.add_option('--write-sub', + video_format.add_option('--write-sub', '--write-srt', action='store_true', dest='writesubtitles', help='write subtitle file (currently youtube only)', default=False) video_format.add_option('--only-sub', @@ -188,7 +188,7 @@ def _find_term_columns(): video_format.add_option('--sub-format', action='store', dest='subtitlesformat', metavar='LANG', help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt') - video_format.add_option('--sub-lang', + video_format.add_option('--sub-lang', '--srt-lang', action='store', dest='subtitleslang', metavar='LANG', help='language of the subtitles to download (optional) use IETF language tags like \'en\'') From f10b2a9c14db686e7f9b7d050f41b26d5cc35e01 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Wed, 20 Mar 2013 12:13:52 +0100 Subject: [PATCH 09/11] fix KeekIE --- youtube_dl/InfoExtractors.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 14fd644a28..835428f323 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3986,11 +3986,11 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) m = re.search(r'[\s\n]+

(?P\w+)

', webpage) - uploader = unescapeHTML(m.group('uploader')) + m = re.search(r'
[\S\s]+?

(?P.+?)

', webpage) + uploader = clean_html(m.group('uploader')) info = { - 'id':video_id, - 'url':video_url, + 'id': video_id, + 'url': video_url, 'ext': 'mp4', 'title': title, 'thumbnail': thumbnail, From 1ee97784052d9f57ec618164a2a4c502186d93b2 Mon Sep 17 00:00:00 2001 From: Chirantan Ekbote Date: Wed, 27 Mar 2013 15:57:11 -0400 Subject: [PATCH 10/11] Use sys.stdout.buffer instead of sys.stdout sys.stdout defaults to text mode, we need to use the underlying buffer instead when writing binary data. Signed-off-by: Chirantan Ekbote --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 95bd948438..901b5b5ad2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -329,7 +329,7 @@ def sanitize_open(filename, open_mode): if sys.platform == 'win32': import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) - return (sys.stdout, filename) + return (sys.stdout.buffer, filename) stream = open(encodeFilename(filename), open_mode) return (stream, filename) except (IOError, OSError) as err: From 898280a056b577c64005647cae68caf8f16ca059 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Thu, 28 Mar 2013 13:13:03 +0100 Subject: [PATCH 11/11] use sys.stdout.buffer only on Python3 --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 901b5b5ad2..49af7d7c08 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -329,7 +329,7 @@ def sanitize_open(filename, open_mode): if sys.platform == 'win32': import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) - return (sys.stdout.buffer, filename) + return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) stream = open(encodeFilename(filename), open_mode) return (stream, filename) except (IOError, OSError) as err: