Implement --add-header without modifying std_headers

Closes #2526, #1614
This commit is contained in:
pukkandan 2022-01-29 03:25:35 +05:30
parent e48b3875ec
commit 8b7539d27c
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39
10 changed files with 28 additions and 29 deletions

View File

@ -737,9 +737,6 @@ ## Workarounds:
--prefer-insecure Use an unencrypted connection to retrieve --prefer-insecure Use an unencrypted connection to retrieve
information about the video (Currently information about the video (Currently
supported only for YouTube) supported only for YouTube)
--user-agent UA Specify a custom user agent
--referer URL Specify a custom referer, use if the video
access is restricted to one domain
--add-header FIELD:VALUE Specify a custom HTTP header and its value, --add-header FIELD:VALUE Specify a custom HTTP header and its value,
separated by a colon ":". You can use this separated by a colon ":". You can use this
option multiple times option multiple times
@ -1866,6 +1863,8 @@ #### Redundant options
--reject-title REGEX --match-filter "title !~= (?i)REGEX" --reject-title REGEX --match-filter "title !~= (?i)REGEX"
--min-views COUNT --match-filter "view_count >=? COUNT" --min-views COUNT --match-filter "view_count >=? COUNT"
--max-views COUNT --match-filter "view_count <=? COUNT" --max-views COUNT --match-filter "view_count <=? COUNT"
--user-agent UA --add-header "User-Agent:UA"
--referer URL --add-header "Referer:URL"
#### Not recommended #### Not recommended

View File

@ -83,6 +83,7 @@
make_dir, make_dir,
make_HTTPS_handler, make_HTTPS_handler,
MaxDownloadsReached, MaxDownloadsReached,
merge_headers,
network_exceptions, network_exceptions,
number_of_digits, number_of_digits,
orderedSet, orderedSet,
@ -332,6 +333,7 @@ class YoutubeDL(object):
nocheckcertificate: Do not verify SSL certificates nocheckcertificate: Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information. prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube. At the moment, this is only supported by YouTube.
http_headers: A dictionary of custom headers to be used for all requests
proxy: URL of the proxy server to use proxy: URL of the proxy server to use
geo_verification_proxy: URL of the proxy to use for IP address verification geo_verification_proxy: URL of the proxy to use for IP address verification
on geo-restricted sites. on geo-restricted sites.
@ -647,6 +649,9 @@ def check_deprecated(param, option, suggestion):
else self.params['format'] if callable(self.params['format']) else self.params['format'] if callable(self.params['format'])
else self.build_format_selector(self.params['format'])) else self.build_format_selector(self.params['format']))
# Set http_headers defaults according to std_headers
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
self._setup_opener() self._setup_opener()
if auto_init: if auto_init:
@ -2250,8 +2255,7 @@ def restore_last_token(self):
return _build_selector_function(parsed_selector) return _build_selector_function(parsed_selector)
def _calc_headers(self, info_dict): def _calc_headers(self, info_dict):
res = std_headers.copy() res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
res.update(info_dict.get('http_headers') or {})
cookies = self._calc_cookies(info_dict) cookies = self._calc_cookies(info_dict)
if cookies: if cookies:

View File

@ -41,6 +41,7 @@
SameFileError, SameFileError,
setproctitle, setproctitle,
std_headers, std_headers,
traverse_obj,
write_string, write_string,
) )
from .update import run_update from .update import run_update
@ -75,20 +76,15 @@ def _real_main(argv=None):
parser, opts, args = parseOpts(argv) parser, opts, args = parseOpts(argv)
warnings, deprecation_warnings = [], [] warnings, deprecation_warnings = [], []
# Set user agent
if opts.user_agent is not None: if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent opts.headers.setdefault('User-Agent', opts.user_agent)
# Set referer
if opts.referer is not None: if opts.referer is not None:
std_headers['Referer'] = opts.referer opts.headers.setdefault('Referer', opts.referer)
# Custom HTTP headers
std_headers.update(opts.headers)
# Dump user agent # Dump user agent
if opts.dump_user_agent: if opts.dump_user_agent:
write_string(std_headers['User-Agent'] + '\n', out=sys.stdout) ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
write_string(f'{ua}\n', out=sys.stdout)
sys.exit(0) sys.exit(0)
# Batch file verification # Batch file verification
@ -767,6 +763,7 @@ def report_deprecation(val, old, new=None):
'legacyserverconnect': opts.legacy_server_connect, 'legacyserverconnect': opts.legacy_server_connect,
'nocheckcertificate': opts.no_check_certificate, 'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure, 'prefer_insecure': opts.prefer_insecure,
'http_headers': opts.headers,
'proxy': opts.proxy, 'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout, 'socket_timeout': opts.socket_timeout,
'bidi_workaround': opts.bidi_workaround, 'bidi_workaround': opts.bidi_workaround,

View File

@ -17,7 +17,6 @@
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
std_headers,
str_or_none, str_or_none,
str_to_int, str_to_int,
traverse_obj, traverse_obj,
@ -503,7 +502,7 @@ def _extract_graphql(self, data, url):
'%s' % rhx_gis, '%s' % rhx_gis,
'', '',
'%s:%s' % (rhx_gis, csrf_token), '%s:%s' % (rhx_gis, csrf_token),
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']), '%s:%s:%s' % (rhx_gis, csrf_token, self.get_param('http_headers')['User-Agent']),
] ]
# try all of the ways to generate a GIS query, and not only use the # try all of the ways to generate a GIS query, and not only use the

View File

@ -8,7 +8,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
std_headers,
update_url_query, update_url_query,
random_uuidv4, random_uuidv4,
try_get, try_get,
@ -70,7 +69,7 @@ def _fetch_dispatcher_config(self):
'clu': '', 'clu': '',
'wh': '1919*810', 'wh': '1919*810',
'rtm': self.iso_timestamp(), 'rtm': self.iso_timestamp(),
'ua': std_headers['User-Agent'], 'ua': self.get_param('http_headers')['User-Agent'],
}).encode('utf8')).decode('utf8').replace('\n', ''), }).encode('utf8')).decode('utf8').replace('\n', ''),
}).encode('utf8')) }).encode('utf8'))
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization') self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')

View File

@ -16,7 +16,6 @@
ExtractorError, ExtractorError,
get_exe_version, get_exe_version,
is_outdated_version, is_outdated_version,
std_headers,
Popen, Popen,
) )
@ -208,7 +207,7 @@ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on w
replaces = self.options replaces = self.options
replaces['url'] = url replaces['url'] = url
user_agent = headers.get('User-Agent') or std_headers['User-Agent'] user_agent = headers.get('User-Agent') or self.get_param('http_headers')['User-Agent']
replaces['ua'] = user_agent.replace('"', '\\"') replaces['ua'] = user_agent.replace('"', '\\"')
replaces['jscode'] = jscode replaces['jscode'] = jscode

View File

@ -17,7 +17,6 @@
qualities, qualities,
remove_end, remove_end,
remove_start, remove_start,
std_headers,
try_get, try_get,
) )
@ -71,7 +70,7 @@ class RTVEALaCartaIE(InfoExtractor):
}] }]
def _real_initialize(self): def _real_initialize(self):
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8') user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8')
self._manager = self._download_json( self._manager = self._download_json(
'http://www.rtve.es/odin/loki/' + user_agent_b64, 'http://www.rtve.es/odin/loki/' + user_agent_b64,
None, 'Fetching manager info')['manager'] None, 'Fetching manager info')['manager']

View File

@ -28,7 +28,6 @@
parse_qs, parse_qs,
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
std_headers,
str_or_none, str_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
@ -758,7 +757,7 @@ def _try_album_password(self, url):
def _real_extract(self, url): def _real_extract(self, url):
url, data = unsmuggle_url(url, {}) url, data = unsmuggle_url(url, {})
headers = std_headers.copy() headers = self.get_param('http_headers').copy()
if 'http_headers' in data: if 'http_headers' in data:
headers.update(data['http_headers']) headers.update(data['http_headers'])
if 'Referer' not in headers: if 'Referer' not in headers:

View File

@ -860,17 +860,16 @@ def _dict_from_options_callback(
workarounds.add_option( workarounds.add_option(
'--user-agent', '--user-agent',
metavar='UA', dest='user_agent', metavar='UA', dest='user_agent',
help='Specify a custom user agent') help=optparse.SUPPRESS_HELP)
workarounds.add_option( workarounds.add_option(
'--referer', '--referer',
metavar='URL', dest='referer', default=None, metavar='URL', dest='referer', default=None,
help='Specify a custom referer, use if the video access is restricted to one domain', help=optparse.SUPPRESS_HELP)
)
workarounds.add_option( workarounds.add_option(
'--add-header', '--add-header',
metavar='FIELD:VALUE', dest='headers', default={}, type='str', metavar='FIELD:VALUE', dest='headers', default={}, type='str',
action='callback', callback=_dict_from_options_callback, action='callback', callback=_dict_from_options_callback,
callback_kwargs={'multiple_keys': False, 'process_key': None}, callback_kwargs={'multiple_keys': False},
help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times', help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times',
) )
workarounds.add_option( workarounds.add_option(

View File

@ -1372,7 +1372,7 @@ def http_request(self, req):
if url != url_escaped: if url != url_escaped:
req = update_Request(req, url=url_escaped) req = update_Request(req, url=url_escaped)
for h, v in std_headers.items(): for h, v in self._params.get('http_headers', std_headers).items():
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
# The dict keys are capitalized because of this bug by urllib # The dict keys are capitalized because of this bug by urllib
if h.capitalize() not in req.headers: if h.capitalize() not in req.headers:
@ -5436,3 +5436,8 @@ def _cancel_all_tasks(loop):
has_websockets = bool(compat_websockets) has_websockets = bool(compat_websockets)
def merge_headers(*dicts):
"""Merge dicts of network headers case insensitively, prioritizing the latter ones"""
return {k.capitalize(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}