From 4ec929dc9b55a2588b4a27e64871c5bfa900bf37 Mon Sep 17 00:00:00 2001
From: huohuarong <huohuarong@gmail.com>
Date: Sat, 3 Aug 2013 10:29:58 +0800
Subject: [PATCH] use ..utils/clean_html()

---
 youtube_dl/extractor/sohu.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 8308142211..cf0ab54788 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -7,7 +7,7 @@
 import urllib2
 
 from .common import InfoExtractor
-from ..utils import compat_urllib_request
+from ..utils import compat_urllib_request, clean_html
 
 
 class SohuIE(InfoExtractor):
@@ -22,16 +22,6 @@ class SohuIE(InfoExtractor):
         },
     }
 
-    def _clearn_html(self, string):
-        tags = re.findall(r'<.+?>', string)
-        for t in tags:
-            string = string.replace(t, ' ')
-        for i in range(2):
-            spaces = re.findall(r'\s+', string)
-            for s in spaces:
-                string = string.replace(s, ' ')
-        string = string.strip()
-        return string
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -40,7 +30,7 @@ def _real_extract(self, url):
         pattern = r'<h1 id="video-title">\n*?(.+?)\n*?</h1>'
         compiled = re.compile(pattern, re.DOTALL)
         title = self._search_regex(compiled, webpage, u'video title').strip('\t\n')
-        title = self._clearn_html(title)
+        title = clean_html(title)
         pattern = re.compile(r'var vid="(\d+)"')
         result = re.search(pattern, webpage)
         if not result:
@@ -93,5 +83,8 @@ def _real_extract(self, url):
             }
             files_info.append(info)
             time.sleep(1)
-
+        if num_of_parts == 1:
+            info =  files_info[0]
+            info['id'] = video_id
+            return info
         return files_info