mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-23 19:35:19 +01:00
13a10d5aa3
This is actually not a compatibility routine but rather a workaround for URLs without protocol specified. The protocol-less URL is treated as HTTP one since it's most probable scenario and it will most likely to redirect to HTTPS if HTTPS was actually expected. This routine could also be useful for any Request preprocessing that may be added in future.
590 lines
19 KiB
Python
590 lines
19 KiB
Python
from __future__ import unicode_literals
|
|
|
|
import binascii
|
|
import collections
|
|
import email
|
|
import getpass
|
|
import io
|
|
import optparse
|
|
import os
|
|
import re
|
|
import shlex
|
|
import shutil
|
|
import socket
|
|
import subprocess
|
|
import sys
|
|
import itertools
|
|
import xml.etree.ElementTree
|
|
|
|
|
|
try:
|
|
import urllib.request as compat_urllib_request
|
|
except ImportError: # Python 2
|
|
import urllib2 as compat_urllib_request
|
|
|
|
try:
|
|
import urllib.error as compat_urllib_error
|
|
except ImportError: # Python 2
|
|
import urllib2 as compat_urllib_error
|
|
|
|
try:
|
|
import urllib.parse as compat_urllib_parse
|
|
except ImportError: # Python 2
|
|
import urllib as compat_urllib_parse
|
|
|
|
try:
|
|
from urllib.parse import urlparse as compat_urllib_parse_urlparse
|
|
except ImportError: # Python 2
|
|
from urlparse import urlparse as compat_urllib_parse_urlparse
|
|
|
|
try:
|
|
import urllib.parse as compat_urlparse
|
|
except ImportError: # Python 2
|
|
import urlparse as compat_urlparse
|
|
|
|
try:
|
|
import urllib.response as compat_urllib_response
|
|
except ImportError: # Python 2
|
|
import urllib as compat_urllib_response
|
|
|
|
try:
|
|
import http.cookiejar as compat_cookiejar
|
|
except ImportError: # Python 2
|
|
import cookielib as compat_cookiejar
|
|
|
|
try:
|
|
import http.cookies as compat_cookies
|
|
except ImportError: # Python 2
|
|
import Cookie as compat_cookies
|
|
|
|
try:
|
|
import html.entities as compat_html_entities
|
|
except ImportError: # Python 2
|
|
import htmlentitydefs as compat_html_entities
|
|
|
|
try:
|
|
import http.client as compat_http_client
|
|
except ImportError: # Python 2
|
|
import httplib as compat_http_client
|
|
|
|
try:
|
|
from urllib.error import HTTPError as compat_HTTPError
|
|
except ImportError: # Python 2
|
|
from urllib2 import HTTPError as compat_HTTPError
|
|
|
|
try:
|
|
from urllib.request import urlretrieve as compat_urlretrieve
|
|
except ImportError: # Python 2
|
|
from urllib import urlretrieve as compat_urlretrieve
|
|
|
|
|
|
try:
|
|
from subprocess import DEVNULL
|
|
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
|
except ImportError:
|
|
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
|
|
|
try:
|
|
import http.server as compat_http_server
|
|
except ImportError:
|
|
import BaseHTTPServer as compat_http_server
|
|
|
|
try:
|
|
compat_str = unicode # Python 2
|
|
except NameError:
|
|
compat_str = str
|
|
|
|
try:
|
|
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
|
from urllib.parse import unquote as compat_urllib_parse_unquote
|
|
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
|
|
except ImportError: # Python 2
|
|
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
|
else re.compile('([\x00-\x7f]+)'))
|
|
|
|
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
|
|
# implementations from cpython 3.4.3's stdlib. Python 2's version
|
|
# is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
|
|
|
|
def compat_urllib_parse_unquote_to_bytes(string):
|
|
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
|
|
# Note: strings are encoded as UTF-8. This is only an issue if it contains
|
|
# unescaped non-ASCII characters, which URIs should not.
|
|
if not string:
|
|
# Is it a string-like object?
|
|
string.split
|
|
return b''
|
|
if isinstance(string, compat_str):
|
|
string = string.encode('utf-8')
|
|
bits = string.split(b'%')
|
|
if len(bits) == 1:
|
|
return string
|
|
res = [bits[0]]
|
|
append = res.append
|
|
for item in bits[1:]:
|
|
try:
|
|
append(compat_urllib_parse._hextochr[item[:2]])
|
|
append(item[2:])
|
|
except KeyError:
|
|
append(b'%')
|
|
append(item)
|
|
return b''.join(res)
|
|
|
|
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
|
"""Replace %xx escapes by their single-character equivalent. The optional
|
|
encoding and errors parameters specify how to decode percent-encoded
|
|
sequences into Unicode characters, as accepted by the bytes.decode()
|
|
method.
|
|
By default, percent-encoded sequences are decoded with UTF-8, and invalid
|
|
sequences are replaced by a placeholder character.
|
|
|
|
unquote('abc%20def') -> 'abc def'.
|
|
"""
|
|
if '%' not in string:
|
|
string.split
|
|
return string
|
|
if encoding is None:
|
|
encoding = 'utf-8'
|
|
if errors is None:
|
|
errors = 'replace'
|
|
bits = _asciire.split(string)
|
|
res = [bits[0]]
|
|
append = res.append
|
|
for i in range(1, len(bits), 2):
|
|
append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
|
|
append(bits[i + 1])
|
|
return ''.join(res)
|
|
|
|
def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
|
|
"""Like unquote(), but also replace plus signs by spaces, as required for
|
|
unquoting HTML form values.
|
|
|
|
unquote_plus('%7e/abc+def') -> '~/abc def'
|
|
"""
|
|
string = string.replace('+', ' ')
|
|
return compat_urllib_parse_unquote(string, encoding, errors)
|
|
|
|
try:
|
|
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
|
except ImportError: # Python < 3.4
|
|
# Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
|
|
class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
|
|
def data_open(self, req):
|
|
# data URLs as specified in RFC 2397.
|
|
#
|
|
# ignores POSTed data
|
|
#
|
|
# syntax:
|
|
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
|
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
|
# data := *urlchar
|
|
# parameter := attribute "=" value
|
|
url = req.get_full_url()
|
|
|
|
scheme, data = url.split(":", 1)
|
|
mediatype, data = data.split(",", 1)
|
|
|
|
# even base64 encoded data URLs might be quoted so unquote in any case:
|
|
data = compat_urllib_parse_unquote_to_bytes(data)
|
|
if mediatype.endswith(";base64"):
|
|
data = binascii.a2b_base64(data)
|
|
mediatype = mediatype[:-7]
|
|
|
|
if not mediatype:
|
|
mediatype = "text/plain;charset=US-ASCII"
|
|
|
|
headers = email.message_from_string(
|
|
"Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
|
|
|
|
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
|
|
|
|
|
# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
|
|
# unwanted failures due to missing protocol
|
|
def compat_urllib_request_Request(url, *args, **kwargs):
|
|
return compat_urllib_request.Request(
|
|
'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
|
|
|
|
|
|
try:
|
|
compat_basestring = basestring # Python 2
|
|
except NameError:
|
|
compat_basestring = str
|
|
|
|
try:
|
|
compat_chr = unichr # Python 2
|
|
except NameError:
|
|
compat_chr = chr
|
|
|
|
try:
|
|
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
|
except ImportError: # Python 2.6
|
|
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
|
|
|
if sys.version_info[0] >= 3:
|
|
compat_etree_fromstring = xml.etree.ElementTree.fromstring
|
|
else:
|
|
# python 2.x tries to encode unicode strings with ascii (see the
|
|
# XMLParser._fixtext method)
|
|
etree = xml.etree.ElementTree
|
|
|
|
try:
|
|
_etree_iter = etree.Element.iter
|
|
except AttributeError: # Python <=2.6
|
|
def _etree_iter(root):
|
|
for el in root.findall('*'):
|
|
yield el
|
|
for sub in _etree_iter(el):
|
|
yield sub
|
|
|
|
# on 2.6 XML doesn't have a parser argument, function copied from CPython
|
|
# 2.7 source
|
|
def _XML(text, parser=None):
|
|
if not parser:
|
|
parser = etree.XMLParser(target=etree.TreeBuilder())
|
|
parser.feed(text)
|
|
return parser.close()
|
|
|
|
def _element_factory(*args, **kwargs):
|
|
el = etree.Element(*args, **kwargs)
|
|
for k, v in el.items():
|
|
if isinstance(v, bytes):
|
|
el.set(k, v.decode('utf-8'))
|
|
return el
|
|
|
|
def compat_etree_fromstring(text):
|
|
doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
|
|
for el in _etree_iter(doc):
|
|
if el.text is not None and isinstance(el.text, bytes):
|
|
el.text = el.text.decode('utf-8')
|
|
return doc
|
|
|
|
try:
|
|
from urllib.parse import parse_qs as compat_parse_qs
|
|
except ImportError: # Python 2
|
|
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
|
# Python 2's version is apparently totally broken
|
|
|
|
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
|
encoding='utf-8', errors='replace'):
|
|
qs, _coerce_result = qs, compat_str
|
|
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
|
r = []
|
|
for name_value in pairs:
|
|
if not name_value and not strict_parsing:
|
|
continue
|
|
nv = name_value.split('=', 1)
|
|
if len(nv) != 2:
|
|
if strict_parsing:
|
|
raise ValueError("bad query field: %r" % (name_value,))
|
|
# Handle case of a control-name with no equal sign
|
|
if keep_blank_values:
|
|
nv.append('')
|
|
else:
|
|
continue
|
|
if len(nv[1]) or keep_blank_values:
|
|
name = nv[0].replace('+', ' ')
|
|
name = compat_urllib_parse_unquote(
|
|
name, encoding=encoding, errors=errors)
|
|
name = _coerce_result(name)
|
|
value = nv[1].replace('+', ' ')
|
|
value = compat_urllib_parse_unquote(
|
|
value, encoding=encoding, errors=errors)
|
|
value = _coerce_result(value)
|
|
r.append((name, value))
|
|
return r
|
|
|
|
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
|
encoding='utf-8', errors='replace'):
|
|
parsed_result = {}
|
|
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
|
encoding=encoding, errors=errors)
|
|
for name, value in pairs:
|
|
if name in parsed_result:
|
|
parsed_result[name].append(value)
|
|
else:
|
|
parsed_result[name] = [value]
|
|
return parsed_result
|
|
|
|
try:
|
|
from shlex import quote as shlex_quote
|
|
except ImportError: # Python < 3.3
|
|
def shlex_quote(s):
|
|
if re.match(r'^[-_\w./]+$', s):
|
|
return s
|
|
else:
|
|
return "'" + s.replace("'", "'\"'\"'") + "'"
|
|
|
|
|
|
if sys.version_info >= (2, 7, 3):
|
|
compat_shlex_split = shlex.split
|
|
else:
|
|
# Working around shlex issue with unicode strings on some python 2
|
|
# versions (see http://bugs.python.org/issue1548891)
|
|
def compat_shlex_split(s, comments=False, posix=True):
|
|
if isinstance(s, compat_str):
|
|
s = s.encode('utf-8')
|
|
return shlex.split(s, comments, posix)
|
|
|
|
|
|
def compat_ord(c):
|
|
if type(c) is int:
|
|
return c
|
|
else:
|
|
return ord(c)
|
|
|
|
|
|
if sys.version_info >= (3, 0):
|
|
compat_getenv = os.getenv
|
|
compat_expanduser = os.path.expanduser
|
|
else:
|
|
# Environment variables should be decoded with filesystem encoding.
|
|
# Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
|
|
|
|
def compat_getenv(key, default=None):
|
|
from .utils import get_filesystem_encoding
|
|
env = os.getenv(key, default)
|
|
if env:
|
|
env = env.decode(get_filesystem_encoding())
|
|
return env
|
|
|
|
# HACK: The default implementations of os.path.expanduser from cpython do not decode
|
|
# environment variables with filesystem encoding. We will work around this by
|
|
# providing adjusted implementations.
|
|
# The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
|
|
# for different platforms with correct environment variables decoding.
|
|
|
|
if os.name == 'posix':
|
|
def compat_expanduser(path):
|
|
"""Expand ~ and ~user constructions. If user or $HOME is unknown,
|
|
do nothing."""
|
|
if not path.startswith('~'):
|
|
return path
|
|
i = path.find('/', 1)
|
|
if i < 0:
|
|
i = len(path)
|
|
if i == 1:
|
|
if 'HOME' not in os.environ:
|
|
import pwd
|
|
userhome = pwd.getpwuid(os.getuid()).pw_dir
|
|
else:
|
|
userhome = compat_getenv('HOME')
|
|
else:
|
|
import pwd
|
|
try:
|
|
pwent = pwd.getpwnam(path[1:i])
|
|
except KeyError:
|
|
return path
|
|
userhome = pwent.pw_dir
|
|
userhome = userhome.rstrip('/')
|
|
return (userhome + path[i:]) or '/'
|
|
elif os.name == 'nt' or os.name == 'ce':
|
|
def compat_expanduser(path):
|
|
"""Expand ~ and ~user constructs.
|
|
|
|
If user or $HOME is unknown, do nothing."""
|
|
if path[:1] != '~':
|
|
return path
|
|
i, n = 1, len(path)
|
|
while i < n and path[i] not in '/\\':
|
|
i = i + 1
|
|
|
|
if 'HOME' in os.environ:
|
|
userhome = compat_getenv('HOME')
|
|
elif 'USERPROFILE' in os.environ:
|
|
userhome = compat_getenv('USERPROFILE')
|
|
elif 'HOMEPATH' not in os.environ:
|
|
return path
|
|
else:
|
|
try:
|
|
drive = compat_getenv('HOMEDRIVE')
|
|
except KeyError:
|
|
drive = ''
|
|
userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
|
|
|
|
if i != 1: # ~user
|
|
userhome = os.path.join(os.path.dirname(userhome), path[1:i])
|
|
|
|
return userhome + path[i:]
|
|
else:
|
|
compat_expanduser = os.path.expanduser
|
|
|
|
|
|
if sys.version_info < (3, 0):
|
|
def compat_print(s):
|
|
from .utils import preferredencoding
|
|
print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
|
|
else:
|
|
def compat_print(s):
|
|
assert isinstance(s, compat_str)
|
|
print(s)
|
|
|
|
|
|
try:
|
|
subprocess_check_output = subprocess.check_output
|
|
except AttributeError:
|
|
def subprocess_check_output(*args, **kwargs):
|
|
assert 'input' not in kwargs
|
|
p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
|
|
output, _ = p.communicate()
|
|
ret = p.poll()
|
|
if ret:
|
|
raise subprocess.CalledProcessError(ret, p.args, output=output)
|
|
return output
|
|
|
|
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
|
def compat_getpass(prompt, *args, **kwargs):
|
|
if isinstance(prompt, compat_str):
|
|
from .utils import preferredencoding
|
|
prompt = prompt.encode(preferredencoding())
|
|
return getpass.getpass(prompt, *args, **kwargs)
|
|
else:
|
|
compat_getpass = getpass.getpass
|
|
|
|
# Old 2.6 and 2.7 releases require kwargs to be bytes
|
|
try:
|
|
def _testfunc(x):
|
|
pass
|
|
_testfunc(**{'x': 0})
|
|
except TypeError:
|
|
def compat_kwargs(kwargs):
|
|
return dict((bytes(k), v) for k, v in kwargs.items())
|
|
else:
|
|
compat_kwargs = lambda kwargs: kwargs
|
|
|
|
|
|
if sys.version_info < (2, 7):
|
|
def compat_socket_create_connection(address, timeout, source_address=None):
|
|
host, port = address
|
|
err = None
|
|
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
|
|
af, socktype, proto, canonname, sa = res
|
|
sock = None
|
|
try:
|
|
sock = socket.socket(af, socktype, proto)
|
|
sock.settimeout(timeout)
|
|
if source_address:
|
|
sock.bind(source_address)
|
|
sock.connect(sa)
|
|
return sock
|
|
except socket.error as _:
|
|
err = _
|
|
if sock is not None:
|
|
sock.close()
|
|
if err is not None:
|
|
raise err
|
|
else:
|
|
raise socket.error("getaddrinfo returns an empty list")
|
|
else:
|
|
compat_socket_create_connection = socket.create_connection
|
|
|
|
|
|
# Fix https://github.com/rg3/youtube-dl/issues/4223
|
|
# See http://bugs.python.org/issue9161 for what is broken
|
|
def workaround_optparse_bug9161():
|
|
op = optparse.OptionParser()
|
|
og = optparse.OptionGroup(op, 'foo')
|
|
try:
|
|
og.add_option('-t')
|
|
except TypeError:
|
|
real_add_option = optparse.OptionGroup.add_option
|
|
|
|
def _compat_add_option(self, *args, **kwargs):
|
|
enc = lambda v: (
|
|
v.encode('ascii', 'replace') if isinstance(v, compat_str)
|
|
else v)
|
|
bargs = [enc(a) for a in args]
|
|
bkwargs = dict(
|
|
(k, enc(v)) for k, v in kwargs.items())
|
|
return real_add_option(self, *bargs, **bkwargs)
|
|
optparse.OptionGroup.add_option = _compat_add_option
|
|
|
|
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
|
|
compat_get_terminal_size = shutil.get_terminal_size
|
|
else:
|
|
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
|
|
|
|
def compat_get_terminal_size(fallback=(80, 24)):
|
|
columns = compat_getenv('COLUMNS')
|
|
if columns:
|
|
columns = int(columns)
|
|
else:
|
|
columns = None
|
|
lines = compat_getenv('LINES')
|
|
if lines:
|
|
lines = int(lines)
|
|
else:
|
|
lines = None
|
|
|
|
if columns is None or lines is None or columns <= 0 or lines <= 0:
|
|
try:
|
|
sp = subprocess.Popen(
|
|
['stty', 'size'],
|
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
out, err = sp.communicate()
|
|
_lines, _columns = map(int, out.split())
|
|
except Exception:
|
|
_columns, _lines = _terminal_size(*fallback)
|
|
|
|
if columns is None or columns <= 0:
|
|
columns = _columns
|
|
if lines is None or lines <= 0:
|
|
lines = _lines
|
|
return _terminal_size(columns, lines)
|
|
|
|
try:
|
|
itertools.count(start=0, step=1)
|
|
compat_itertools_count = itertools.count
|
|
except TypeError: # Python 2.6
|
|
def compat_itertools_count(start=0, step=1):
|
|
n = start
|
|
while True:
|
|
yield n
|
|
n += step
|
|
|
|
if sys.version_info >= (3, 0):
|
|
from tokenize import tokenize as compat_tokenize_tokenize
|
|
else:
|
|
from tokenize import generate_tokens as compat_tokenize_tokenize
|
|
|
|
__all__ = [
|
|
'compat_HTTPError',
|
|
'compat_basestring',
|
|
'compat_chr',
|
|
'compat_cookiejar',
|
|
'compat_cookies',
|
|
'compat_etree_fromstring',
|
|
'compat_expanduser',
|
|
'compat_get_terminal_size',
|
|
'compat_getenv',
|
|
'compat_getpass',
|
|
'compat_html_entities',
|
|
'compat_http_client',
|
|
'compat_http_server',
|
|
'compat_itertools_count',
|
|
'compat_kwargs',
|
|
'compat_ord',
|
|
'compat_parse_qs',
|
|
'compat_print',
|
|
'compat_shlex_split',
|
|
'compat_socket_create_connection',
|
|
'compat_str',
|
|
'compat_subprocess_get_DEVNULL',
|
|
'compat_tokenize_tokenize',
|
|
'compat_urllib_error',
|
|
'compat_urllib_parse',
|
|
'compat_urllib_parse_unquote',
|
|
'compat_urllib_parse_unquote_plus',
|
|
'compat_urllib_parse_unquote_to_bytes',
|
|
'compat_urllib_parse_urlparse',
|
|
'compat_urllib_request',
|
|
'compat_urllib_request_DataHandler',
|
|
'compat_urllib_response',
|
|
'compat_urlparse',
|
|
'compat_urlretrieve',
|
|
'compat_xml_parse_error',
|
|
'shlex_quote',
|
|
'subprocess_check_output',
|
|
'workaround_optparse_bug9161',
|
|
]
|