From 0f06bcd7591332937fdec497d6cbb4914358bc79 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 9 May 2022 17:24:28 +0530 Subject: [PATCH] [cleanup] Minor fixes (See desc) * [youtube] Fix `--youtube-skip-dash-manifest` * [build] Use `$()` in `Makefile`. Closes #3684 * Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700 * Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38 * [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode` * [utils] LazyList: Expose unnecessarily "protected" attributes and other minor cleanup --- Makefile | 2 +- devscripts/make_readme.py | 2 +- devscripts/make_supportedsites.py | 5 +- devscripts/update-formulae.py | 2 +- test/helper.py | 2 +- test/test_InfoExtractor.py | 8 +-- test/test_YoutubeDLCookieJar.py | 2 +- test/test_aes.py | 8 +-- test/test_compat.py | 2 +- test/test_http.py | 8 +-- test/test_socks.py | 10 +-- test/test_subtitles.py | 2 +- test/test_update.py.disabled | 2 +- test/test_utils.py | 2 +- yt_dlp/YoutubeDL.py | 16 ++--- yt_dlp/aes.py | 2 +- yt_dlp/cookies.py | 20 +++--- yt_dlp/downloader/external.py | 2 +- yt_dlp/downloader/f4m.py | 2 +- yt_dlp/downloader/hls.py | 4 +- yt_dlp/downloader/http.py | 6 +- yt_dlp/downloader/ism.py | 2 +- yt_dlp/downloader/mhtml.py | 4 +- yt_dlp/downloader/niconico.py | 2 +- yt_dlp/downloader/websocket.py | 2 +- yt_dlp/downloader/youtube_live_chat.py | 6 +- yt_dlp/extractor/dplay.py | 3 +- yt_dlp/extractor/generic.py | 14 ----- yt_dlp/extractor/youtube.py | 5 +- yt_dlp/postprocessor/common.py | 8 +-- yt_dlp/postprocessor/xattrpp.py | 2 +- yt_dlp/socks.py | 10 +-- yt_dlp/update.py | 4 +- yt_dlp/utils.py | 87 +++++++++++++------------- yt_dlp/webvtt.py | 2 +- 35 files changed, 124 insertions(+), 136 deletions(-) diff --git a/Makefile b/Makefile index 179aaff57d..7fa4a6d46c 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ PYTHON ?= /usr/bin/env python3 SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) # set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 -MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) +MARKDOWN = $(shell if [ "$(pandoc -v | head -n1 | cut -d" " -f2 | head -c1)" = "2" ]; then echo markdown-smart; else echo markdown; fi) install: lazy-extractors yt-dlp yt-dlp.1 completions mkdir -p $(DESTDIR)$(BINDIR) diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 1401c2e5a7..fd234bf58f 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -14,7 +14,7 @@ helptext = sys.stdin.read() if isinstance(helptext, bytes): - helptext = helptext.decode('utf-8') + helptext = helptext.decode() start, end = helptext.index(f'\n {OPTIONS_START}'), helptext.index(f'\n{EPILOG_START}') options = re.sub(r'(?m)^ (\w.+)$', r'## \1', helptext[start + 1: end + 1]) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 0a0d08f56f..0403c1ae63 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -3,9 +3,8 @@ import os import sys -# Import yt_dlp -ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') -sys.path.insert(0, ROOT_DIR) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + import yt_dlp diff --git a/devscripts/update-formulae.py b/devscripts/update-formulae.py index 6424f5d9bc..a89872c7b9 100644 --- a/devscripts/update-formulae.py +++ b/devscripts/update-formulae.py @@ -17,7 +17,7 @@ pypi_release = json.loads(compat_urllib_request.urlopen( 'https://pypi.org/pypi/yt-dlp/%s/json' % normalized_version -).read().decode('utf-8')) +).read().decode()) tarball_file = next(x for x in pypi_release['urls'] if x['filename'].endswith('.tar.gz')) diff --git a/test/helper.py b/test/helper.py index 81e53ed74c..2333ace988 100644 --- a/test/helper.py +++ b/test/helper.py @@ -92,7 +92,7 @@ def gettestcases(include_onlymatching=False): yield from ie.get_testcases(include_onlymatching) -md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() +md5 = lambda s: hashlib.md5(s.encode()).hexdigest() def expect_value(self, got, expected, field): diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 173b629201..257ea7dd35 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1360,7 +1360,7 @@ def test_parse_mpd_formats(self): for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( - compat_etree_fromstring(f.read().encode('utf-8')), + compat_etree_fromstring(f.read().encode()), mpd_base_url=mpd_base_url, mpd_url=mpd_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) @@ -1551,7 +1551,7 @@ def test_parse_ism_formats(self): for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES: with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_ism_formats_and_subtitles( - compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url) + compat_etree_fromstring(f.read().encode()), ism_url=ism_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subtitles, expected_subtitles, None) @@ -1577,7 +1577,7 @@ def test_parse_f4m_formats(self): for f4m_file, f4m_url, expected_formats in _TEST_CASES: with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( - compat_etree_fromstring(f.read().encode('utf-8')), + compat_etree_fromstring(f.read().encode()), f4m_url, None) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) @@ -1624,7 +1624,7 @@ def test_parse_xspf(self): for xspf_file, xspf_url, expected_entries in _TEST_CASES: with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f: entries = self.ie._parse_xspf( - compat_etree_fromstring(f.read().encode('utf-8')), + compat_etree_fromstring(f.read().encode()), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) expect_value(self, entries, expected_entries, None) for i in range(len(entries)): diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index 13a4569b24..6280e1f2cb 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -17,7 +17,7 @@ def test_keep_session_cookies(self): tf = tempfile.NamedTemporaryFile(delete=False) try: cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True) - temp = tf.read().decode('utf-8') + temp = tf.read().decode() self.assertTrue(re.search( r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp)) self.assertTrue(re.search( diff --git a/test/test_aes.py b/test/test_aes.py index c934104e35..2b7b7cf542 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -81,19 +81,19 @@ def test_gcm_decrypt(self): self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_decrypt_text(self): - password = intlist_to_bytes(self.key).decode('utf-8') + password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' - ).decode('utf-8') + ).decode() decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) - password = intlist_to_bytes(self.key).decode('utf-8') + password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' - ).decode('utf-8') + ).decode() decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) diff --git a/test/test_compat.py b/test/test_compat.py index 9b185853da..224175c658 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -90,7 +90,7 @@ def test_compat_etree_fromstring(self): spam ''' - doc = compat_etree_fromstring(xml.encode('utf-8')) + doc = compat_etree_fromstring(xml.encode()) self.assertTrue(isinstance(doc.attrib['foo'], compat_str)) self.assertTrue(isinstance(doc.attrib['spam'], compat_str)) self.assertTrue(isinstance(doc.find('normal').text, compat_str)) diff --git a/test/test_http.py b/test/test_http.py index fb8c9f4e9d..664e09ace1 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -140,7 +140,7 @@ def do_GET(self): self.send_response(200) self.send_header('Content-Type', 'text/plain; charset=utf-8') self.end_headers() - self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8')) + self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode()) return HTTPTestRequestHandler @@ -167,12 +167,12 @@ def test_proxy(self): 'geo_verification_proxy': geo_proxy, }) url = 'http://foo.com/bar' - response = ydl.urlopen(url).read().decode('utf-8') + response = ydl.urlopen(url).read().decode() self.assertEqual(response, f'normal: {url}') req = compat_urllib_request.Request(url) req.add_header('Ytdl-request-proxy', geo_proxy) - response = ydl.urlopen(req).read().decode('utf-8') + response = ydl.urlopen(req).read().decode() self.assertEqual(response, f'geo: {url}') def test_proxy_with_idn(self): @@ -180,7 +180,7 @@ def test_proxy_with_idn(self): 'proxy': f'127.0.0.1:{self.port}', }) url = 'http://中文.tw/' - response = ydl.urlopen(url).read().decode('utf-8') + response = ydl.urlopen(url).read().decode() # b'xn--fiq228c' is '中文'.encode('idna') self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') diff --git a/test/test_socks.py b/test/test_socks.py index 546f0d73dc..a8b068cddb 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -32,7 +32,7 @@ def test_proxy_http(self): 'proxy': params['primary_proxy'] }) self.assertEqual( - ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'), + ydl.urlopen('http://yt-dl.org/ip').read().decode(), params['primary_server_ip']) def test_proxy_https(self): @@ -43,7 +43,7 @@ def test_proxy_https(self): 'proxy': params['primary_proxy'] }) self.assertEqual( - ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'), + ydl.urlopen('https://yt-dl.org/ip').read().decode(), params['primary_server_ip']) def test_secondary_proxy_http(self): @@ -54,7 +54,7 @@ def test_secondary_proxy_http(self): req = compat_urllib_request.Request('http://yt-dl.org/ip') req.add_header('Ytdl-request-proxy', params['secondary_proxy']) self.assertEqual( - ydl.urlopen(req).read().decode('utf-8'), + ydl.urlopen(req).read().decode(), params['secondary_server_ip']) def test_secondary_proxy_https(self): @@ -65,7 +65,7 @@ def test_secondary_proxy_https(self): req = compat_urllib_request.Request('https://yt-dl.org/ip') req.add_header('Ytdl-request-proxy', params['secondary_proxy']) self.assertEqual( - ydl.urlopen(req).read().decode('utf-8'), + ydl.urlopen(req).read().decode(), params['secondary_server_ip']) @@ -96,7 +96,7 @@ def _get_ip(self, protocol): ydl = FakeYDL({ 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), }) - return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8') + return ydl.urlopen('http://yt-dl.org/ip').read().decode() def test_socks4(self): self.assertTrue(isinstance(self._get_ip('socks4'), compat_str)) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 362b67ceff..182bd7a4b1 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -51,7 +51,7 @@ def getSubtitles(self): for sub_info in subtitles.values(): if sub_info.get('data') is None: uf = self.DL.urlopen(sub_info['url']) - sub_info['data'] = uf.read().decode('utf-8') + sub_info['data'] = uf.read().decode() return {l: sub_info['data'] for l, sub_info in subtitles.items()} diff --git a/test/test_update.py.disabled b/test/test_update.py.disabled index 389b8ffe5f..73b55cdace 100644 --- a/test/test_update.py.disabled +++ b/test/test_update.py.disabled @@ -21,7 +21,7 @@ class TestUpdate(unittest.TestCase): signature = versions_info['signature'] del versions_info['signature'] self.assertTrue(rsa_verify( - json.dumps(versions_info, sort_keys=True).encode('utf-8'), + json.dumps(versions_info, sort_keys=True).encode(), signature, UPDATES_RSA_KEY)) diff --git a/test/test_utils.py b/test/test_utils.py index 5e220087b5..184c39cff2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1759,7 +1759,7 @@ def test_LazyList_laziness(self): def test(ll, idx, val, cache): self.assertEqual(ll[idx], val) - self.assertEqual(getattr(ll, '_LazyList__cache'), list(cache)) + self.assertEqual(ll._cache, list(cache)) ll = LazyList(range(10)) test(ll, 0, 0, range(1)) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index de34b8bd7c..f9670429a9 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -773,9 +773,9 @@ def _bidi_workaround(self, message): assert hasattr(self, '_output_process') assert isinstance(message, compat_str) line_count = message.count('\n') + 1 - self._output_process.stdin.write((message + '\n').encode('utf-8')) + self._output_process.stdin.write((message + '\n').encode()) self._output_process.stdin.flush() - res = ''.join(self._output_channel.readline().decode('utf-8') + res = ''.join(self._output_channel.readline().decode() for _ in range(line_count)) return res[:-len('\n')] @@ -1181,7 +1181,7 @@ def create_key(outer_mobj): value = map(str, variadic(value) if '#' in flags else [value]) value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt elif fmt[-1] == 'B': # bytes - value = f'%{str_fmt}'.encode() % str(value).encode('utf-8') + value = f'%{str_fmt}'.encode() % str(value).encode() value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( @@ -2243,7 +2243,7 @@ def final_selector(ctx): return selector_function(ctx_copy) return final_selector - stream = io.BytesIO(format_spec.encode('utf-8')) + stream = io.BytesIO(format_spec.encode()) try: tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline))) except tokenize.TokenError: @@ -3194,8 +3194,8 @@ def ffmpeg_fixup(cndn, msg, cls): downloader = downloader.__name__ if downloader else None if info_dict.get('requested_formats') is None: # Not necessary if doing merger - live_fixup = info_dict.get('is_live') and not self.params.get('hls_use_mpegts') - ffmpeg_fixup(downloader == 'HlsFD' or live_fixup, + fixup_live = info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None + ffmpeg_fixup(downloader == 'HlsFD' or fixup_live, 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', @@ -3700,10 +3700,10 @@ def python_implementation(): # Not implemented if False and self.params.get('call_home'): - ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') + ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() write_debug('Public IP address: %s' % ipaddr) latest_version = self.urlopen( - 'https://yt-dl.org/latest/version').read().decode('utf-8') + 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( 'You are using an outdated version (newest version: %s)! ' diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index ba3baf3ded..d0e6d7549f 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -265,7 +265,7 @@ def aes_decrypt_text(data, password, key_size_bytes): NONCE_LENGTH_BYTES = 8 data = bytes_to_intlist(compat_b64decode(data)) - password = bytes_to_intlist(password.encode('utf-8')) + password = bytes_to_intlist(password.encode()) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 621c91e86a..b06edfc5d6 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -283,10 +283,10 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure): - host_key = host_key.decode('utf-8') - name = name.decode('utf-8') - value = value.decode('utf-8') - path = path.decode('utf-8') + host_key = host_key.decode() + name = name.decode() + value = value.decode() + path = path.decode() is_encrypted = not value and encrypted_value if is_encrypted: @@ -458,7 +458,7 @@ def decrypt(self, encrypted_value): self._cookie_counts['other'] += 1 # any other prefix means the data is DPAPI encrypted # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc - return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8') + return _decrypt_windows_dpapi(encrypted_value, self._logger).decode() def _extract_safari_cookies(profile, logger): @@ -521,7 +521,7 @@ def read_cstring(self): while True: c = self.read_bytes(1) if c == b'\x00': - return b''.join(buffer).decode('utf-8') + return b''.join(buffer).decode() else: buffer.append(c) @@ -735,7 +735,7 @@ def _get_kwallet_network_wallet(logger): logger.warning('failed to read NetworkWallet') return default_wallet else: - network_wallet = stdout.decode('utf-8').strip() + network_wallet = stdout.decode().strip() logger.debug(f'NetworkWallet = "{network_wallet}"') return network_wallet except Exception as e: @@ -873,7 +873,7 @@ def pbkdf2_sha1(password, salt, iterations, key_length): def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) try: - return plaintext.decode('utf-8') + return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -887,7 +887,7 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): return None try: - return plaintext.decode('utf-8') + return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -939,7 +939,7 @@ def _open_database_copy(database_path, tmpdir): def _get_column_names(cursor, table_name): table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall() - return [row[1].decode('utf-8') for row in table_info] + return [row[1].decode() for row in table_info] def _find_most_recently_used_file(root, filename, logger): diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 4f9f8f6e55..85c6a6977b 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -299,7 +299,7 @@ def _make_cmd(self, tmpfilename, info_dict): fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) stream, _ = self.sanitize_open(url_list_file, 'wb') - stream.write('\n'.join(url_list).encode('utf-8')) + stream.write('\n'.join(url_list).encode()) stream.close() cmd += ['-i', url_list_file] else: diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 12ecec0082..7b6665167b 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -412,7 +412,7 @@ def real_download(self, filename, info_dict): if box_type == b'mdat': self._append_fragment(ctx, box_data) break - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError as err: if live and (err.code == 404 or err.code == 410): # We didn't keep up with the live window. Continue # with the next available fragment. diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index f65f91f4fb..2e01c7bac5 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -329,7 +329,7 @@ def pack_fragment(frag_content, frag_index): continue block.write_into(output) - return output.getvalue().encode('utf-8') + return output.getvalue().encode() def fin_fragments(): dedup_window = extra_state.get('webvtt_dedup_window') @@ -340,7 +340,7 @@ def fin_fragments(): for cue in dedup_window: webvtt.CueBlock.from_json(cue).write_into(output) - return output.getvalue().encode('utf-8') + return output.getvalue().encode() self.download_and_append_fragments( ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index d590dbfbd7..9b7598b1cf 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -150,7 +150,7 @@ def establish_connection(): ctx.resume_len = 0 ctx.open_mode = 'wb' ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError as err: if err.code == 416: # Unable to resume (requested range not satisfiable) try: @@ -158,7 +158,7 @@ def establish_connection(): ctx.data = self.ydl.urlopen( sanitized_Request(url, request_data, headers)) content_length = ctx.data.info()['Content-Length'] - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError as err: if err.code < 500 or err.code >= 600: raise else: @@ -268,7 +268,7 @@ def retry(e): if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) + write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index 82ed51e889..0aaba8c153 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -151,7 +151,7 @@ def write_piff_header(stream, params): sample_entry_payload += u16.pack(0x18) # depth sample_entry_payload += s16.pack(-1) # pre defined - codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) + codec_private_data = binascii.unhexlify(params['codec_private_data'].encode()) if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index 8a6619960d..f999fca781 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -54,7 +54,7 @@ class MhtmlFD(FragmentFD): def _escape_mime(s): return '=?utf-8?Q?' + (b''.join( bytes((b,)) if b >= 0x20 else b'=%02X' % b - for b in quopri.encodestring(s.encode('utf-8'), header=True) + for b in quopri.encodestring(s.encode(), header=True) )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): @@ -151,7 +151,7 @@ def real_download(self, filename, info_dict): length=len(stub), title=self._escape_mime(title), stub=stub - ).encode('utf-8')) + ).encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 0e6c177b73..5947446b14 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -51,4 +51,4 @@ def heartbeat(): with heartbeat_lock: timer[0].cancel() download_complete = True - return success + return success diff --git a/yt_dlp/downloader/websocket.py b/yt_dlp/downloader/websocket.py index eb1b99b456..727a15828e 100644 --- a/yt_dlp/downloader/websocket.py +++ b/yt_dlp/downloader/websocket.py @@ -19,7 +19,7 @@ def real_download(self, filename, info_dict): async def call_conn(proc, stdin): try: await self.real_connection(stdin, info_dict) - except (BrokenPipeError, OSError): + except OSError: pass finally: with contextlib.suppress(OSError): diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index 7f06dfb487..4486607255 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -47,7 +47,7 @@ def parse_actions_replay(live_chat_continuation): replay_chat_item_action = action['replayChatItemAction'] offset = int(replay_chat_item_action['videoOffsetTimeMsec']) processed_fragment.extend( - json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + json.dumps(action, ensure_ascii=False).encode() + b'\n') if offset is not None: continuation = try_get( live_chat_continuation, @@ -89,7 +89,7 @@ def parse_actions_live(live_chat_continuation): 'isLive': True, } processed_fragment.extend( - json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') + json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n') continuation_data_getters = [ lambda x: x['continuations'][0]['invalidationContinuationData'], lambda x: x['continuations'][0]['timedContinuationData'], @@ -183,7 +183,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) headers.update({'content-type': 'application/json'}) - fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n' success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( url, frag_index, fragment_request_data, headers) else: diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 54f95a44af..5c4f3c8923 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -8,6 +8,7 @@ ExtractorError, float_or_none, int_or_none, + remove_start, strip_or_none, try_get, unified_timestamp, @@ -311,7 +312,7 @@ class DPlayIE(DPlayBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = mobj.group('id') - domain = mobj.group('domain').lstrip('www.') + domain = remove_start(mobj.group('domain'), 'www.') country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com' return self._get_disco_api_info( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 340161a421..0d0e002e50 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1030,20 +1030,6 @@ class GenericIE(InfoExtractor): 'filesize': 24687186, }, }, - { - 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz', - 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4', - 'info_dict': { - 'id': 'uxjb0lwrcz', - 'ext': 'mp4', - 'title': 'Conversation about Hexagonal Rails Part 1', - 'description': 'a Martin Fowler video from ThoughtWorks', - 'duration': 1715.0, - 'uploader': 'thoughtworks.wistia.com', - 'timestamp': 1401832161, - 'upload_date': '20140603', - }, - }, # Wistia standard embed (async) { 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1c6e205104..907b079ec4 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3173,7 +3173,8 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live, durati # Eg: __2ABJjxzNo, ySuUZEjARPY is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500) if is_damaged: - self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) + self.report_warning( + f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), @@ -3222,6 +3223,8 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live, durati skip_manifests = self._configuration_arg('skip') if not self.get_param('youtube_include_hls_manifest', True): skip_manifests.append('hls') + if not self.get_param('youtube_include_dash_manifest', True): + skip_manifests.append('dash') get_dash = 'dash' not in skip_manifests and ( not is_live or live_from_start or self._configuration_arg('include_live_dash')) get_hls = not live_from_start and 'hls' not in skip_manifests diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index 1d11e82a25..addc46e5be 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -93,10 +93,10 @@ def write_debug(self, text, *args, **kwargs): return self._downloader.write_debug(text, *args, **kwargs) def _delete_downloaded_files(self, *files_to_delete, **kwargs): - if not self._downloader: - for filename in set(filter(None, files_to_delete)): - os.remove(filename) - return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs) + if self._downloader: + return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs) + for filename in set(filter(None, files_to_delete)): + os.remove(filename) def get_param(self, name, default=None, *args, **kwargs): if self._downloader: diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index 065ddf9632..f822eff41c 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -43,7 +43,7 @@ def run(self, info): if value: if infoname == 'upload_date': value = hyphenate_date(value) - write_xattr(info['filepath'], xattrname, value.encode('utf-8')) + write_xattr(info['filepath'], xattrname, value.encode()) except XAttrUnavailableError as e: raise PostProcessingError(str(e)) diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py index 56fab08ab3..34ba1394ab 100644 --- a/yt_dlp/socks.py +++ b/yt_dlp/socks.py @@ -149,11 +149,11 @@ def _setup_socks4(self, address, is_4a=False): packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr - username = (self._proxy.username or '').encode('utf-8') + username = (self._proxy.username or '').encode() packet += username + b'\x00' if is_4a and self._proxy.remote_dns: - packet += destaddr.encode('utf-8') + b'\x00' + packet += destaddr.encode() + b'\x00' self.sendall(packet) @@ -192,8 +192,8 @@ def _socks5_auth(self): raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE) if method == Socks5Auth.AUTH_USER_PASS: - username = self._proxy.username.encode('utf-8') - password = self._proxy.password.encode('utf-8') + username = self._proxy.username.encode() + password = self._proxy.password.encode() packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION) packet += self._len_and_data(username) + self._len_and_data(password) self.sendall(packet) @@ -216,7 +216,7 @@ def _setup_socks5(self, address): reserved = 0 packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) if ipaddr is None: - destaddr = destaddr.encode('utf-8') + destaddr = destaddr.encode() packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) packet += self._len_and_data(destaddr) else: diff --git a/yt_dlp/update.py b/yt_dlp/update.py index eea08ce43e..8dcf260f5c 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -74,7 +74,7 @@ def calc_sha256sum(path): # Download and check versions info try: - version_info = ydl._opener.open(JSON_URL).read().decode('utf-8') + version_info = ydl._opener.open(JSON_URL).read().decode() version_info = json.loads(version_info) except Exception: return report_network_error('obtain version info', delim='; Please try again later or') @@ -118,7 +118,7 @@ def get_sha256sum(bin_or_exe, version): {}).get('browser_download_url') if not urlh: return None - hash_data = ydl._opener.open(urlh).read().decode('utf-8') + hash_data = ydl._opener.open(urlh).read().decode() return dict(ln.split()[::-1] for ln in hash_data.splitlines()).get(filename) if not os.access(filename, os.W_OK): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 8b2c1c75a7..62dc412a85 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -737,8 +737,8 @@ def extract_basic_auth(url): parts.hostname if parts.port is None else '%s:%d' % (parts.hostname, parts.port)))) auth_payload = base64.b64encode( - ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8')) - return url, 'Basic ' + auth_payload.decode('utf-8') + ('%s:%s' % (parts.username, parts.password or '')).encode()) + return url, f'Basic {auth_payload.decode()}' def sanitized_Request(url, *args, **kwargs): @@ -1339,7 +1339,7 @@ def http_response(self, req, resp): location = resp.headers.get('Location') if location: # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 - location = location.encode('iso-8859-1').decode('utf-8') + location = location.encode('iso-8859-1').decode() location_escaped = escape_url(location) if location != location_escaped: del resp.headers['Location'] @@ -2309,7 +2309,7 @@ def setproctitle(title): # a bytestring, but since unicode_literals turns # every string into a unicode string, it fails. return - title_bytes = title.encode('utf-8') + title_bytes = title.encode() buf = ctypes.create_string_buffer(len(title_bytes)) buf.value = title_bytes try: @@ -2351,13 +2351,13 @@ def base_url(url): def urljoin(base, path): if isinstance(path, bytes): - path = path.decode('utf-8') + path = path.decode() if not isinstance(path, compat_str) or not path: return None if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path): return path if isinstance(base, bytes): - base = base.decode('utf-8') + base = base.decode() if not isinstance(base, compat_str) or not re.match( r'^(?:https?:)?//', base): return None @@ -2557,49 +2557,48 @@ def get_exe_version(exe, args=['--version'], class LazyList(collections.abc.Sequence): - ''' Lazy immutable list from an iterable - Note that slices of a LazyList are lists and not LazyList''' + """Lazy immutable list from an iterable + Note that slices of a LazyList are lists and not LazyList""" class IndexError(IndexError): pass def __init__(self, iterable, *, reverse=False, _cache=None): - self.__iterable = iter(iterable) - self.__cache = [] if _cache is None else _cache - self.__reversed = reverse + self._iterable = iter(iterable) + self._cache = [] if _cache is None else _cache + self._reversed = reverse def __iter__(self): - if self.__reversed: + if self._reversed: # We need to consume the entire iterable to iterate in reverse yield from self.exhaust() return - yield from self.__cache - for item in self.__iterable: - self.__cache.append(item) + yield from self._cache + for item in self._iterable: + self._cache.append(item) yield item - def __exhaust(self): - self.__cache.extend(self.__iterable) - # Discard the emptied iterable to make it pickle-able - self.__iterable = [] - return self.__cache + def _exhaust(self): + self._cache.extend(self._iterable) + self._iterable = [] # Discard the emptied iterable to make it pickle-able + return self._cache def exhaust(self): - ''' Evaluate the entire iterable ''' - return self.__exhaust()[::-1 if self.__reversed else 1] + """Evaluate the entire iterable""" + return self._exhaust()[::-1 if self._reversed else 1] @staticmethod - def __reverse_index(x): + def _reverse_index(x): return None if x is None else -(x + 1) def __getitem__(self, idx): if isinstance(idx, slice): - if self.__reversed: - idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1)) + if self._reversed: + idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1)) start, stop, step = idx.start, idx.stop, idx.step or 1 elif isinstance(idx, int): - if self.__reversed: - idx = self.__reverse_index(idx) + if self._reversed: + idx = self._reverse_index(idx) start, stop, step = idx, idx, 0 else: raise TypeError('indices must be integers or slices') @@ -2608,35 +2607,35 @@ def __getitem__(self, idx): or (stop is None and step > 0)): # We need to consume the entire iterable to be able to slice from the end # Obviously, never use this with infinite iterables - self.__exhaust() + self._exhaust() try: - return self.__cache[idx] + return self._cache[idx] except IndexError as e: raise self.IndexError(e) from e - n = max(start or 0, stop or 0) - len(self.__cache) + 1 + n = max(start or 0, stop or 0) - len(self._cache) + 1 if n > 0: - self.__cache.extend(itertools.islice(self.__iterable, n)) + self._cache.extend(itertools.islice(self._iterable, n)) try: - return self.__cache[idx] + return self._cache[idx] except IndexError as e: raise self.IndexError(e) from e def __bool__(self): try: - self[-1] if self.__reversed else self[0] + self[-1] if self._reversed else self[0] except self.IndexError: return False return True def __len__(self): - self.__exhaust() - return len(self.__cache) + self._exhaust() + return len(self._cache) def __reversed__(self): - return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache) + return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache) def __copy__(self): - return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache) + return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache) def __repr__(self): # repr and str should mimic a list. So we exhaust the iterable @@ -2850,9 +2849,9 @@ def _multipart_encode_impl(data, boundary): for k, v in data.items(): out += b'--' + boundary.encode('ascii') + b'\r\n' if isinstance(k, compat_str): - k = k.encode('utf-8') + k = k.encode() if isinstance(v, compat_str): - v = v.encode('utf-8') + v = v.encode() # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 # suggests sending UTF-8 directly. Firefox sends UTF-8, too content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n' @@ -4741,7 +4740,7 @@ def write_xattr(path, key, value): 'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the ' + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)')) - value = value.decode('utf-8') + value = value.decode() try: p = Popen( [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path], @@ -4820,7 +4819,7 @@ def iri_to_uri(iri): net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~") net_location += '@' - net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames. + net_location += iri_parts.hostname.encode('idna').decode() # Punycode for Unicode hostnames. # The 'idna' encoding produces ASCII text. if iri_parts.port is not None and iri_parts.port != 80: net_location += ':' + str(iri_parts.port) @@ -5063,9 +5062,9 @@ def jwt_encode_hs256(payload_data, key, headers={}): } if headers: header_data.update(headers) - header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8')) - payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8')) - h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256) + header_b64 = base64.b64encode(json.dumps(header_data).encode()) + payload_b64 = base64.b64encode(json.dumps(payload_data).encode()) + h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256) signature_b64 = base64.b64encode(h.digest()) token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64 return token diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 4c222ba8e1..b8974f8831 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -346,7 +346,7 @@ def parse_fragment(frag_content): a bytes object containing the raw contents of a WebVTT file. """ - parser = _MatchParser(frag_content.decode('utf-8')) + parser = _MatchParser(frag_content.decode()) yield Magic.parse(parser)