From 37419b4f9937f11ed3ca3545a32ed3451eb734ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 12 Sep 2014 23:20:17 +0700
Subject: [PATCH] [YoutubeDL] Escape non-ASCII characters in URLs

urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
Working around by replacing request's original URL with escaped one
---
 youtube_dl/YoutubeDL.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 553bf559b3..99cd05e651 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -28,6 +28,7 @@
     compat_str,
     compat_urllib_error,
     compat_urllib_request,
+    compat_urllib_parse_urlparse,
     ContentTooShortError,
     date_from_str,
     DateRange,
@@ -1241,6 +1242,31 @@ def line(format, idlen=20):
 
     def urlopen(self, req):
         """ Start an HTTP download """
+
+        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+        # Working around by replacing request's original URL with escaped one
+
+        url = req if isinstance(req, compat_str) else req.get_full_url()
+
+        def escape(component):
+            return compat_cookiejar.escape_path(component.encode('utf-8'))
+
+        url_parsed = compat_urllib_parse_urlparse(url)
+        url_escaped = url_parsed._replace(
+            path=escape(url_parsed.path),
+            query=escape(url_parsed.query),
+            fragment=escape(url_parsed.fragment)
+        ).geturl()
+
+        # Substitute URL if any change after escaping
+        if url != url_escaped:
+            if isinstance(req, compat_str):
+                req = url_escaped
+            else:
+                req = compat_urllib_request.Request(
+                    url_escaped, data=req.data, headers=req.headers,
+                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):